This is the formal analysis file for the “The Nature of Reasonableness” by by Kevin Tobia, Ivar R. Hannikainen, David Kamper, Guilherme Almeida, Piotr Bystranowski, Niek Strohmaier, Vilius Dranseika, Markus Kneer, Fernando Aguiar, Kristina Dolinina, Bartosz Janik, Eglė Lauraitytė, Alice Liefgreen, Maciej Próchnicki, Alejandro Rosas, Vivek Kumar Shukla, and Noel Struchiner (2025, Stanford Journal of International Law).

Introduction and Loading of Files

# Load Libraries

library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ forcats   1.0.0     ✔ readr     2.1.5
## ✔ ggplot2   3.5.1     ✔ stringr   1.5.1
## ✔ lubridate 1.9.4     ✔ tibble    3.2.1
## ✔ purrr     1.0.2     ✔ tidyr     1.3.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(stats)
library(ggplot2)
library(ggthemes)
library(corrplot)
## corrplot 0.95 loaded
library(gridExtra)
## 
## Attaching package: 'gridExtra'
## 
## The following object is masked from 'package:dplyr':
## 
##     combine
library(kableExtra)
## 
## Attaching package: 'kableExtra'
## 
## The following object is masked from 'package:dplyr':
## 
##     group_rows
library(effects)
## Loading required package: carData
## lattice theme set by effectsTheme()
## See ?effectsTheme for details.
DATA_PATH <- "/Users/dgkamper/Library/CloudStorage/GoogleDrive-dgkamper@gmail.com/My Drive/DGK Lab/Collaborations/Language and Law Laboratory/Analysis/LegalResonablenessAnalysis/Data_all_numeric.csv"

Initial Data Loading and Preparation

# Read the data from specified path
Data_All <- read.csv(DATA_PATH, stringsAsFactors = FALSE)

# Verify data structure
glimpse(Data_All)
## Rows: 2,426
## Columns: 42
## $ q1             <dbl> 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2…
## $ q2             <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ q3             <dbl> 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8, 8…
## $ q4             <dbl> 2500, 2500, 2500, 2500, 2500, 2500, 2500, 2500, 2500, 2…
## $ m              <dbl> 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, 120, …
## $ q6             <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ q7             <dbl> 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20, 20,…
## $ q8             <dbl> 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, 100, …
## $ q9             <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ q10            <dbl> 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3…
## $ q11            <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ q12            <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ q13            <dbl> 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25,…
## $ q14            <chr> "30", "30", "30", "30", "30", "30", "30", "30", "30", "…
## $ q15            <dbl> 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25, 25,…
## $ q16            <dbl> 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5…
## $ q17            <dbl> 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3…
## $ q18            <int> 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12, 12,…
## $ q19            <int> 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15, 15,…
## $ q20            <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ q21            <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ q22            <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1…
## $ q23            <dbl> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1…
## $ q24            <dbl> 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5…
## $ q25            <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ q26            <dbl> 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3…
## $ q27            <dbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0…
## $ q28            <dbl> 5, 5, 10, 50, 20, 5, 5, 10, 25, 60, 100, 80, 5, 20, 10,…
## $ q29            <dbl> 100, 50, 2, 25, 100, 100, 5, 50, 100, 100, 100, 100, 15…
## $ q30            <dbl> 3, 4, 2, 1, 6, 1, 2, 12, 26, 5, 0, 1, 8, 2, 4, 3, 4, 2,…
## $ q31            <dbl> 20, 750, 2, 12, 25, 0, 3, 100, 10, 30, 250, 50, 50, 10,…
## $ q32            <dbl> 5, 1, 2, 48, 24, 24, 55, 24, 24, 24, 24, 48, 24, 48, 12…
## $ q33            <chr> "0", "10", "2", "25", "4", "8", "2", "4", "4", "3.2", "…
## $ q34            <dbl> 95, 1, 2, 15, 50, 0, 5, 0, 95, 85, 100, 99, 50, 30, 15,…
## $ age            <int> 18, 47, 28, 30, NA, NA, 30, 50, 47, 33, 0, 0, 30, 43, 4…
## $ Gender         <chr> "Male", "Male", "Male", "Male", "Female", "Female", "Ma…
## $ ID             <chr> "usa1", "usa2", "usa3", "usa4", "usa5", "usa6", "usa7",…
## $ Country_name   <chr> "USA", "USA", "USA", "USA", "USA", "USA", "USA", "USA",…
## $ Condition_name <chr> "Reasonable", "Reasonable", "Reasonable", "Reasonable",…
## $ ID.1           <chr> "usa1", "usa2", "usa3", "usa4", "usa5", "usa6", "usa7",…
## $ Country        <int> 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1…
## $ Condition      <int> 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3…

Exclude participants whose answer to Q19 does not equal 15 [comprehension/attention question].

# Split data by condition
Data_All_Average <- Data_All %>% filter(Condition_name == "Average")
Data_All_Ideal <- Data_All %>% filter(Condition_name == "Ideal")
Data_All_Reasonable <- Data_All %>% filter(Condition_name == "Reasonable")

# Exclude participants whose answer to Q19 does not equal 15 [comprehension/attention question].
Data_All_Average_Pass1 <- Data_All_Average %>% filter(q19 == 15)
Data_All_Ideal_Pass1 <- Data_All_Ideal %>% filter(q19 == 15)
Data_All_Reasonable_Pass1 <- Data_All_Reasonable %>% filter(q19 == 15)

# Print participant counts
condition_counts <- data.frame(
  Condition = c("Average", "Ideal", "Reasonable"),
  Original_Count = c(nrow(Data_All_Average), nrow(Data_All_Ideal), nrow(Data_All_Reasonable)),
  After_Attention_Check = c(nrow(Data_All_Average_Pass1), nrow(Data_All_Ideal_Pass1), nrow(Data_All_Reasonable_Pass1)),
  Excluded_Count = c(nrow(Data_All_Average) - nrow(Data_All_Average_Pass1), 
                     nrow(Data_All_Ideal) - nrow(Data_All_Ideal_Pass1),
                     nrow(Data_All_Reasonable) - nrow(Data_All_Reasonable_Pass1)),
  Exclusion_Rate = c((nrow(Data_All_Average) - nrow(Data_All_Average_Pass1))/nrow(Data_All_Average),
                     (nrow(Data_All_Ideal) - nrow(Data_All_Ideal_Pass1))/nrow(Data_All_Ideal),
                     (nrow(Data_All_Reasonable) - nrow(Data_All_Reasonable_Pass1))/nrow(Data_All_Reasonable))
)

# Format the exclusion rate as percentage
condition_counts$Exclusion_Rate <- scales::percent(condition_counts$Exclusion_Rate)

# Display the participant counts table
kable(condition_counts, caption = "Participant Counts Before and After Attention Check") %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = F)
Participant Counts Before and After Attention Check
Condition Original_Count After_Attention_Check Excluded_Count Exclusion_Rate
Average 815 793 22 2.70%
Ideal 825 800 25 3.03%
Reasonable 786 758 28 3.56%
# Define the question columns - ensuring they actually exist in the data
question_cols <- c("q1", "q2", "q3", "q4", "m", "q6", "q7", "q8", "q9", "q10", 
                  "q11", "q12", "q13", "q14", "q15", "q16", "q17", "q18", 
                  "q20", "q21", "q22", "q23", "q24", "q25", "q26", "q27", 
                  "q28", "q29", "q30", "q31", "q32", "q33", "q34")

# Check if all question columns exist in the data
existing_cols <- question_cols[question_cols %in% colnames(Data_All_Average_Pass1)]
missing_cols <- question_cols[!question_cols %in% colnames(Data_All_Average_Pass1)]

print("Existing question columns:")
## [1] "Existing question columns:"
print(existing_cols)
##  [1] "q1"  "q2"  "q3"  "q4"  "m"   "q6"  "q7"  "q8"  "q9"  "q10" "q11" "q12"
## [13] "q13" "q14" "q15" "q16" "q17" "q18" "q20" "q21" "q22" "q23" "q24" "q25"
## [25] "q26" "q27" "q28" "q29" "q30" "q31" "q32" "q33" "q34"
print("Missing question columns:")
## [1] "Missing question columns:"
print(missing_cols)
## character(0)

Check Number of Participants per Condition

# Continue with only the existing columns
question_cols <- existing_cols

# Convert columns to numeric - using a safer approach
Data_All_Average_Pass1 <- Data_All_Average_Pass1 %>%
  mutate(across(all_of(question_cols), ~as.numeric(as.character(.))))

Data_All_Ideal_Pass1 <- Data_All_Ideal_Pass1 %>%
  mutate(across(all_of(question_cols), ~as.numeric(as.character(.))))
## Warning: There were 2 warnings in `mutate()`.
## The first warning was:
## ℹ In argument: `across(all_of(question_cols), ~as.numeric(as.character(.)))`.
## Caused by warning:
## ! NAs introduced by coercion
## ℹ Run `dplyr::last_dplyr_warnings()` to see the 1 remaining warning.
Data_All_Reasonable_Pass1 <- Data_All_Reasonable_Pass1 %>%
  mutate(across(all_of(question_cols), ~as.numeric(as.character(.))))

# Verify the number of participants in each condition
cat("Ideal condition:", nrow(Data_All_Ideal_Pass1), "participants\n")
## Ideal condition: 800 participants
cat("Average condition:", nrow(Data_All_Average_Pass1), "participants\n")
## Average condition: 793 participants
cat("Reasonable condition:", nrow(Data_All_Reasonable_Pass1), "participants\n")
## Reasonable condition: 758 participants

Analysis 1: Overall results, mean ratings [following Bear & Knobe 2016]

Compute the mean rating and filter data

For any response that is greater than 3 standard deviations from that question mean, exclude that response.

# Function to filter data based on 3 SD criterion
filter_data <- function(data, cols) {
  # Pre-allocate a list to track excluded participants for each question
  exclusions_by_question <- list()
  
  # Original count
  original_count <- nrow(data)
  
  result <- data
  for (col in cols) {
    if (col %in% colnames(data)) {
      # Calculate mean and standard deviation
      avg <- mean(result[[col]], na.rm = TRUE)
      s <- sd(result[[col]], na.rm = TRUE)
      
      # Identify outliers
      outlier_indices <- which(!is.na(result[[col]]) & abs(result[[col]] - avg) > 3 * s)
      exclusions_by_question[[col]] <- length(outlier_indices)
      
      # Filter out outliers
      result <- result %>%
        filter(is.na(!!sym(col)) | abs(!!sym(col) - avg) <= 3 * s)
    }
  }
  
  # Final count
  final_count <- nrow(result)
  
  return(list(
    filtered_data = result,
    original_count = original_count,
    final_count = final_count,
    excluded_count = original_count - final_count,
    exclusion_rate = (original_count - final_count) / original_count,
    exclusions_by_question = exclusions_by_question
  ))
}

# Apply filtering and track exclusion counts
avg_filter_results <- filter_data(Data_All_Average_Pass1, question_cols)
ideal_filter_results <- filter_data(Data_All_Ideal_Pass1, question_cols)
reasonable_filter_results <- filter_data(Data_All_Reasonable_Pass1, question_cols)

# Display outlier exclusion summary
outlier_summary <- data.frame(
  Condition = c("Average", "Ideal", "Reasonable"),
  Original_Count = c(avg_filter_results$original_count, 
                    ideal_filter_results$original_count, 
                    reasonable_filter_results$original_count),
  After_Outlier_Removal = c(avg_filter_results$final_count, 
                           ideal_filter_results$final_count, 
                           reasonable_filter_results$final_count),
  Excluded_Count = c(avg_filter_results$excluded_count, 
                    ideal_filter_results$excluded_count, 
                    reasonable_filter_results$excluded_count),
  Exclusion_Rate = c(avg_filter_results$exclusion_rate,
                    ideal_filter_results$exclusion_rate,
                    reasonable_filter_results$exclusion_rate)
)

# Format the exclusion rate as percentage
outlier_summary$Exclusion_Rate <- scales::percent(outlier_summary$Exclusion_Rate)

# Display the outlier exclusion summary table
kable(outlier_summary, caption = "Participant Counts Before and After Outlier Removal") %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = F)
Participant Counts Before and After Outlier Removal
Condition Original_Count After_Outlier_Removal Excluded_Count Exclusion_Rate
Average 793 609 184 23.20%
Ideal 800 608 192 24.00%
Reasonable 758 720 38 5.01%
# Extract the filtered data
Data_All_Average_Filtered <- avg_filter_results$filtered_data
Data_All_Ideal_Filtered <- ideal_filter_results$filtered_data
Data_All_Reasonable_Filtered <- reasonable_filter_results$filtered_data

# Calculate means after excluding outliers
filtered_column_means_average <- sapply(Data_All_Average_Filtered[, question_cols], mean, na.rm = TRUE)
filtered_column_means_ideal <- sapply(Data_All_Ideal_Filtered[, question_cols], mean, na.rm = TRUE)
filtered_column_means_reasonable <- sapply(Data_All_Reasonable_Filtered[, question_cols], mean, na.rm = TRUE)

# Create a dataframe for the means
means_df <- data.frame(
  Question = names(filtered_column_means_average),
  Average = filtered_column_means_average,
  Ideal = filtered_column_means_ideal,
  Reasonable = filtered_column_means_reasonable
)

# Display the means for each question
kable(means_df, caption = "Mean Values for Each Question by Condition") %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = F)
Mean Values for Each Question by Condition
Question Average Ideal Reasonable
q1 q1 4.717079 2.423138 2.000000
q2 q2 10.120066 3.508487 0.000000
q3 q3 4.425497 6.945092 8.000000
q4 q4 1932.197655 1647.105983 2500.000000
m m 26.377667 41.317647 120.000000
q6 q6 27.802562 6.301167 0.000000
q7 q7 19.770132 6.589167 20.000000
q8 q8 8.612893 18.018303 100.000000
q9 q9 5.765728 11.359191 0.000000
q10 q10 5032.678623 8.432998 3.000000
q11 q11 5050.380471 7079.111932 0.000000
q12 q12 38.078602 8.936772 0.000000
q13 q13 59.506633 20.380235 25.000000
q14 q14 13.284132 3.547840 30.000000
q15 q15 11.635323 14.586236 25.000000
q16 q16 7.638550 7.958609 5.000000
q17 q17 208.070957 1.675530 3.000000
q18 q18 15.521523 9.793333 12.000000
q20 q20 26.018242 5.932231 0.000000
q21 q21 10.178713 8.181063 0.000000
q22 q22 13.893401 10.539496 1.000000
q23 q23 4.134615 4.432161 1.000000
q24 q24 46.867638 47.390252 5.000000
q25 q25 8980.698978 2641.582131 0.000000
q26 q26 12.584312 4.116415 3.000000
q27 q27 28.446488 8.343697 0.000000
q28 q28 15.162038 26.797274 22.767883
q29 q29 54.365320 72.998331 67.816395
q30 q30 32.157095 4.830236 7.575977
q31 q31 160.200000 58.560861 136.542155
q32 q32 44.029362 55.881313 44.316547
q33 q33 7.452260 3.735182 5.003173
q34 q34 53.374080 24.978115 41.688666
# Create a comparison table of means for plotting
means_long <- means_df %>%
  pivot_longer(cols = c(Average, Ideal, Reasonable), 
               names_to = "Condition", 
               values_to = "Mean")

Convert mean responses to a natural log scale.

## Convert mean responses to a natural log scale.
log_filtered_column_means_average <- log(filtered_column_means_average)
log_filtered_column_means_ideal <- log(filtered_column_means_ideal)
log_filtered_column_means_reasonable <- log(filtered_column_means_reasonable)

# Check if we have any zeros before taking logs
print("Checking for zeros in means:")
## [1] "Checking for zeros in means:"
print("Zeros in average means:")
## [1] "Zeros in average means:"
print(names(filtered_column_means_average[filtered_column_means_average == 0]))
## character(0)
print("Zeros in ideal means:")
## [1] "Zeros in ideal means:"
print(names(filtered_column_means_ideal[filtered_column_means_ideal == 0]))
## character(0)
print("Zeros in reasonable means:")
## [1] "Zeros in reasonable means:"
print(names(filtered_column_means_reasonable[filtered_column_means_reasonable == 0]))
## [1] "q2"  "q6"  "q9"  "q11" "q12" "q20" "q21" "q25" "q27"

Convert mean responses to a natural log scale (done more safely accounting zeros in reasonable means)

safe_log <- function(x) {
  # Replace zeros with a small constant
  x[x == 0] <- 0.01
  log(x)
}

# Take logs 
log_filtered_column_means_average <- safe_log(filtered_column_means_average)
log_filtered_column_means_ideal <- safe_log(filtered_column_means_ideal)
log_filtered_column_means_reasonable <- safe_log(filtered_column_means_reasonable)

# Create a dataframe for the log means
log_means_df <- data.frame(
  Question = names(log_filtered_column_means_average),
  Average = log_filtered_column_means_average,
  Ideal = log_filtered_column_means_ideal,
  Reasonable = log_filtered_column_means_reasonable
)

Conduct three regressions and record AIC

# Prepare data for regression
dataforAICMeans <- data.frame(
  Reasonable = log_filtered_column_means_reasonable,
  Average = log_filtered_column_means_average,
  Ideal = log_filtered_column_means_ideal
)

# Model I: Average predicts reasonable
Model1Means <- lm(Reasonable ~ Average, data = dataforAICMeans)
AIC1Means <- AIC(Model1Means)
r2_model1 <- summary(Model1Means)$r.squared

# Model II: Ideal predicts reasonable
Model2Means <- lm(Reasonable ~ Ideal, data = dataforAICMeans)
AIC2Means <- AIC(Model2Means)
r2_model2 <- summary(Model2Means)$r.squared

# Model III: Both average and ideal predict reasonable
Model3Means <- lm(Reasonable ~ Average + Ideal, data = dataforAICMeans)
AIC3Means <- AIC(Model3Means)
r2_model3 <- summary(Model3Means)$r.squared

# Create a regression summary dataframe
regression_summary <- data.frame(
  Model = c("Average predicts Reasonable", 
            "Ideal predicts Reasonable", 
            "Average + Ideal predict Reasonable"),
  AIC = c(AIC1Means, AIC2Means, AIC3Means),
  R_squared = c(r2_model1, r2_model2, r2_model3)
)

# Display the regression summary table
kable(regression_summary, caption = "Regression Model Comparison") %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = F)
Regression Model Comparison
Model AIC R_squared
Average predicts Reasonable 184.0899 0.0010386
Ideal predicts Reasonable 183.9865 0.0041640
Average + Ideal predict Reasonable 185.5350 0.0176975

Plots and Reporting

# Identify the best model based on AIC
best_model <- which.min(c(AIC1Means, AIC2Means, AIC3Means))
best_model_name <- c("Average → Reasonable", "Ideal → Reasonable", "Hybrid Model")[best_model]

# Create separate plots for Model 1 and Model 2
plot1 <- ggplot(dataforAICMeans, aes(x = Average, y = Reasonable)) +
  geom_point(size = 3, alpha = 0.7) +
  geom_smooth(method = "lm", se = TRUE, color = "blue") +
  labs(title = "Model 1: Average Predicts Reasonable",
       subtitle = paste("AIC =", round(AIC1Means, 2), ", R² =", round(r2_model1, 3)),
       x = "Log Average Rating",
       y = "Log Reasonable Rating") +
  theme_minimal() +
  theme(
    plot.title = element_text(size = 14, face = "bold"),
    plot.subtitle = element_text(size = 12)
  )

plot2 <- ggplot(dataforAICMeans, aes(x = Ideal, y = Reasonable)) +
  geom_point(size = 3, alpha = 0.7) +
  geom_smooth(method = "lm", se = TRUE, color = "red") +
  labs(title = "Model 2: Ideal Predicts Reasonable",
       subtitle = paste("AIC =", round(AIC2Means, 2), ", R² =", round(r2_model2, 3)),
       x = "Log Ideal Rating",
       y = "Log Reasonable Rating") +
  theme_minimal() +
  theme(
    plot.title = element_text(size = 14, face = "bold"),
    plot.subtitle = element_text(size = 12)
  )

# Create Model 3 visualization - effects of Average while holding Ideal constant
new_data <- expand.grid(
  Average = seq(min(dataforAICMeans$Average), max(dataforAICMeans$Average), length.out = 100),
  Ideal = median(dataforAICMeans$Ideal)  # Hold Ideal constant at median
)
new_data$predicted_reasonable <- predict(Model3Means, newdata = new_data)

plot3 <- ggplot(dataforAICMeans, aes(x = Average, y = Reasonable)) +
  geom_point(size = 3, alpha = 0.5) +
  geom_line(data = new_data, aes(y = predicted_reasonable), color = "blue", size = 1) +
  labs(title = "Model 3: Effect of Average",
       subtitle = "Controlling for Ideal (held at median)",
       x = "Log Average Rating",
       y = "Log Reasonable Rating") +
  theme_minimal() +
  theme(
    plot.title = element_text(size = 14, face = "bold"),
    plot.subtitle = element_text(size = 12)
  )
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
# Create Model 3 visualization - effects of Ideal while holding Average constant
new_data2 <- expand.grid(
  Average = median(dataforAICMeans$Average),  # Hold Average constant at median
  Ideal = seq(min(dataforAICMeans$Ideal), max(dataforAICMeans$Ideal), length.out = 100)
)
new_data2$predicted_reasonable <- predict(Model3Means, newdata = new_data2)

plot4 <- ggplot(dataforAICMeans, aes(x = Ideal, y = Reasonable)) +
  geom_point(size = 3, alpha = 0.5) +
  geom_line(data = new_data2, aes(y = predicted_reasonable), color = "red", size = 1) +
  labs(title = "Model 3: Effect of Ideal",
       subtitle = "Controlling for Average (held at median)",
       x = "Log Ideal Rating",
       y = "Log Reasonable Rating") +
  theme_minimal() +
  theme(
    plot.title = element_text(size = 14, face = "bold"),
    plot.subtitle = element_text(size = 12)
  )

# Display plots individually 
print(plot1)
## `geom_smooth()` using formula = 'y ~ x'

print(plot2)
## `geom_smooth()` using formula = 'y ~ x'

print(plot3)

print(plot4)

# Create an explanatory figure showing the relationship between means
means_summary <- data.frame(
  Question = 1:length(filtered_column_means_average),
  Average = filtered_column_means_average,
  Ideal = filtered_column_means_ideal,
  Reasonable = filtered_column_means_reasonable
) %>%
  # Take a subset of questions for clearer visualization
  slice(1:10)

# Convert to long format for plotting
means_summary_long <- means_summary %>%
  pivot_longer(cols = c(Average, Ideal, Reasonable),
               names_to = "Rating_Type",
               values_to = "Value") %>%
  mutate(Rating_Type = factor(Rating_Type, levels = c("Average", "Reasonable", "Ideal")))

# Create an explanatory plot
explanatory_plot <- ggplot(means_summary_long, aes(x = Rating_Type, y = Value, group = Question, color = factor(Question))) +
  geom_line(size = 1) +
  geom_point(size = 3) +
  labs(title = "Relationship Between Average, Reasonable, and Ideal Ratings",
       subtitle = "Example of first 10 questions showing how Reasonable often falls between Average and Ideal",
       x = "Rating Type",
       y = "Value",
       color = "Question") +
  theme_minimal() +
  theme(
    legend.position = "none",
    plot.title = element_text(size = 14, face = "bold"),
    plot.subtitle = element_text(size = 12)
  )

# Display and save the explanatory plot
print(explanatory_plot)

ggsave("reasonable_between_average_ideal.png", explanatory_plot, width = 8, height = 6, dpi = 300)

# Create table of model coefficients
get_model_coefs <- function(model) {
  coefs <- summary(model)$coefficients
  data.frame(
    Term = rownames(coefs),
    Estimate = coefs[,1],
    Std_Error = coefs[,2],
    t_value = coefs[,3],
    p_value = coefs[,4]
  )
}

# Format p-values with asterisks for significance levels
format_pvalue <- function(p) {
  if (p < 0.001) return("< 0.001 ***")
  if (p < 0.01) return(paste0(round(p, 3), " **"))
  if (p < 0.05) return(paste0(round(p, 3), " *"))
  return(as.character(round(p, 3)))
}

# Get and format model coefficients
model3_coefs <- get_model_coefs(Model3Means)
model3_coefs$p_value <- sapply(model3_coefs$p_value, format_pvalue)

# Display the hybrid model coefficients
kable(model3_coefs, caption = "Hybrid Model Coefficients (Average + Ideal → Reasonable)") %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = F)
Hybrid Model Coefficients (Average + Ideal → Reasonable)
Term Estimate Std_Error t_value p_value
(Intercept) (Intercept) 0.8034340 1.3519113 0.5942949 0.557
Average Average -0.3039662 0.4728053 -0.6428994 0.525
Ideal Ideal 0.3583289 0.5023663 0.7132821 0.481
# Store analysis results (simplified)
analysis_1_results <- list(
  regression_models = list(
    model1 = Model1Means,
    model2 = Model2Means,
    model3 = Model3Means
  ),
  regression_summary = regression_summary,
  aic_values = c(AIC1 = AIC1Means, AIC2 = AIC2Means, AIC3 = AIC3Means)
)

Analysis 2: Overall results, intermediacy [following Bear & Knobe 2016]

Determine if each mean reasonableness rating is on the “ideal side” of average.

# Function to check if reasonable is on the "ideal side" of average.
# That is, if the reasonable rating is as close or closer to the ideal than to the average.
is_ideal_side <- function(average, ideal, reasonable) {
  ifelse(abs(reasonable - ideal) <= abs(reasonable - average), 1, 0)
}

# Apply the function to raw means
ideal_side_vector <- mapply(is_ideal_side,
                           filtered_column_means_average,
                           filtered_column_means_ideal,
                           filtered_column_means_reasonable)

# Apply the function to log-transformed means
ideal_side_vector_log <- mapply(is_ideal_side,
                               log_filtered_column_means_average,
                               log_filtered_column_means_ideal,
                               log_filtered_column_means_reasonable)

# Count how many questions have reasonableness on the "ideal side" of average
count_ideal_side <- sum(ideal_side_vector)
count_ideal_side_log <- sum(ideal_side_vector_log)

# Conduct binomial tests to assess whether the proportion is greater than 50%
binomial_result_ideal <- binom.test(count_ideal_side, length(question_cols), p = 0.5, 
                                   alternative = "greater")
binomial_result_ideal_log <- binom.test(count_ideal_side_log, length(question_cols), p = 0.5, 
                                      alternative = "greater")

# Create a summary table for the binomial test results
ideal_side_summary <- data.frame(
  Data_Type = c("Raw Means", "Log Means"),
  On_Ideal_Side_Count = c(count_ideal_side, count_ideal_side_log),
  Total_Questions = c(length(question_cols), length(question_cols)),
  Proportion = c(count_ideal_side/length(question_cols), 
                count_ideal_side_log/length(question_cols)),
  p_value = c(binomial_result_ideal$p.value, binomial_result_ideal_log$p.value)
)

# Format proportions and p-values for display
ideal_side_summary$Proportion <- scales::percent(ideal_side_summary$Proportion)
ideal_side_summary$p_value <- sapply(ideal_side_summary$p_value, format_pvalue)

# Display the binomial test results table
kable(ideal_side_summary, 
      caption = "Binomial Test Results: Is Reasonableness on the Ideal Side of Average?") %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = F)
Binomial Test Results: Is Reasonableness on the Ideal Side of Average?
Data_Type On_Ideal_Side_Count Total_Questions Proportion p_value
Raw Means 22 33 67% 0.04 *
Log Means 22 33 67% 0.04 *

Determine if each mean reasonableness rating is on the “average side” of ideal.

# Function to check if reasonable is on the "average side" of ideal.
# That is, if the reasonable rating is as close or closer to the average than to the ideal.
is_average_side <- function(average, ideal, reasonable) {
  ifelse(abs(reasonable - average) <= abs(reasonable - ideal), 1, 0)
}

# Apply the function to raw means
average_side_vector <- mapply(is_average_side,
                             filtered_column_means_average,
                             filtered_column_means_ideal,
                             filtered_column_means_reasonable)

# Apply the function to log-transformed means
average_side_vector_log <- mapply(is_average_side,
                                 log_filtered_column_means_average,
                                 log_filtered_column_means_ideal,
                                 log_filtered_column_means_reasonable)

# Count how many questions have reasonableness on the "average side" of ideal
count_average_side <- sum(average_side_vector)
count_average_side_log <- sum(average_side_vector_log)

# Conduct binomial tests to assess whether the proportion is greater than 50%
binomial_result_avg_side <- binom.test(count_average_side, length(question_cols), p = 0.5, 
                                      alternative = "greater")
binomial_result_avg_side_log <- binom.test(count_average_side_log, length(question_cols), p = 0.5, 
                                         alternative = "greater")

# Create a summary table for the binomial test results
average_side_summary <- data.frame(
  Data_Type = c("Raw Means", "Log Means"),
  On_Average_Side_Count = c(count_average_side, count_average_side_log),
  Total_Questions = c(length(question_cols), length(question_cols)),
  Proportion = c(count_average_side/length(question_cols), 
                count_average_side_log/length(question_cols)),
  p_value = c(binomial_result_avg_side$p.value, binomial_result_avg_side_log$p.value)
)

# Format proportions and p-values for display
average_side_summary$Proportion <- scales::percent(average_side_summary$Proportion)
average_side_summary$p_value <- sapply(average_side_summary$p_value, format_pvalue)

# Display the binomial test results table
kable(average_side_summary, 
      caption = "Binomial Test Results: Is Reasonableness on the Average Side of Ideal?") %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = F)
Binomial Test Results: Is Reasonableness on the Average Side of Ideal?
Data_Type On_Average_Side_Count Total_Questions Proportion p_value
Raw Means 11 33 33% 0.982
Log Means 11 33 33% 0.982

Determine if reasonableness is BOTH on the ideal side of average AND on the average side of ideal.

# Function to check if reasonable falls between average and ideal.
# This is a robust check: if (reasonable - average) and (reasonable - ideal) have opposite signs or are zero,
# then reasonable is between (or equal to one of) the anchors.
is_both_sides <- function(average, ideal, reasonable) {
  ifelse((reasonable - average) * (reasonable - ideal) <= 0, 1, 0)
}

# Apply the function to raw means
both_sides_vector <- mapply(is_both_sides,
                           filtered_column_means_average,
                           filtered_column_means_ideal,
                           filtered_column_means_reasonable)

# Apply the function to log-transformed means
both_sides_vector_log <- mapply(is_both_sides,
                               log_filtered_column_means_average,
                               log_filtered_column_means_ideal,
                               log_filtered_column_means_reasonable)

# Count how many questions have reasonableness on both sides
count_both_sides <- sum(both_sides_vector)
count_both_sides_log <- sum(both_sides_vector_log)

# Conduct binomial tests with 1/3 as the chance rate (as specified in the analysis plan)
binomial_result_both_sides <- binom.test(count_both_sides, length(question_cols), p = 1/3, 
                                        alternative = "two.sided")
binomial_result_both_sides_log <- binom.test(count_both_sides_log, length(question_cols), p = 1/3, 
                                           alternative = "two.sided")

# Create a summary table for the binomial test results
both_sides_summary <- data.frame(
  Data_Type = c("Raw Means", "Log Means"),
  On_Both_Sides_Count = c(count_both_sides, count_both_sides_log),
  Total_Questions = c(length(question_cols), length(question_cols)),
  Proportion = c(count_both_sides/length(question_cols), 
                count_both_sides_log/length(question_cols)),
  Expected_Chance = c(1/3, 1/3),
  p_value = c(binomial_result_both_sides$p.value, binomial_result_both_sides_log$p.value)
)

# Format proportions and p-values for display
both_sides_summary$Proportion <- scales::percent(both_sides_summary$Proportion)
both_sides_summary$Expected_Chance <- scales::percent(both_sides_summary$Expected_Chance)
both_sides_summary$p_value <- sapply(both_sides_summary$p_value, format_pvalue)

# Display the binomial test results table
kable(both_sides_summary, 
      caption = "Binomial Test Results: Is Reasonableness Between Average and Ideal?") %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = F)
Binomial Test Results: Is Reasonableness Between Average and Ideal?
Data_Type On_Both_Sides_Count Total_Questions Proportion Expected_Chance p_value
Raw Means 10 33 30% 33% 0.854
Log Means 10 33 30% 33% 0.854

Plots and Reporting

# Create a table showing the results for each question
question_position_summary <- data.frame(
  Question = names(filtered_column_means_average),
  Average_Value = filtered_column_means_average,
  Ideal_Value = filtered_column_means_ideal,
  Reasonable_Value = filtered_column_means_reasonable,
  On_Ideal_Side = ideal_side_vector,
  On_Average_Side = average_side_vector,
  Between_Average_And_Ideal = both_sides_vector
)

# Categorize each question's position
question_position_summary$Position <- "Other"
question_position_summary$Position[question_position_summary$Between_Average_And_Ideal == 1] <- "Between Average and Ideal"
question_position_summary$Position[question_position_summary$On_Ideal_Side == 1 & 
                                  question_position_summary$On_Average_Side == 0] <- "Beyond Ideal"
question_position_summary$Position[question_position_summary$On_Ideal_Side == 0 & 
                                  question_position_summary$On_Average_Side == 1] <- "Beyond Average"

# Display the question position summary
kable(question_position_summary[, c("Question", "Average_Value", "Ideal_Value", "Reasonable_Value", "Position")], 
      caption = "Position of Reasonable Rating Relative to Average and Ideal for Each Question") %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = F)
Position of Reasonable Rating Relative to Average and Ideal for Each Question
Question Average_Value Ideal_Value Reasonable_Value Position
q1 q1 4.717079 2.423138 2.000000 Beyond Ideal
q2 q2 10.120066 3.508487 0.000000 Beyond Ideal
q3 q3 4.425497 6.945092 8.000000 Beyond Ideal
q4 q4 1932.197655 1647.105983 2500.000000 Beyond Average
m m 26.377667 41.317647 120.000000 Beyond Ideal
q6 q6 27.802562 6.301167 0.000000 Beyond Ideal
q7 q7 19.770132 6.589167 20.000000 Beyond Average
q8 q8 8.612893 18.018303 100.000000 Beyond Ideal
q9 q9 5.765728 11.359191 0.000000 Beyond Average
q10 q10 5032.678623 8.432998 3.000000 Beyond Ideal
q11 q11 5050.380471 7079.111932 0.000000 Beyond Average
q12 q12 38.078602 8.936772 0.000000 Beyond Ideal
q13 q13 59.506633 20.380235 25.000000 Beyond Ideal
q14 q14 13.284132 3.547840 30.000000 Beyond Average
q15 q15 11.635323 14.586236 25.000000 Beyond Ideal
q16 q16 7.638550 7.958609 5.000000 Beyond Average
q17 q17 208.070957 1.675530 3.000000 Beyond Ideal
q18 q18 15.521523 9.793333 12.000000 Beyond Ideal
q20 q20 26.018242 5.932231 0.000000 Beyond Ideal
q21 q21 10.178713 8.181063 0.000000 Beyond Ideal
q22 q22 13.893401 10.539496 1.000000 Beyond Ideal
q23 q23 4.134615 4.432161 1.000000 Beyond Average
q24 q24 46.867638 47.390252 5.000000 Beyond Average
q25 q25 8980.698978 2641.582131 0.000000 Beyond Ideal
q26 q26 12.584312 4.116415 3.000000 Beyond Ideal
q27 q27 28.446488 8.343697 0.000000 Beyond Ideal
q28 q28 15.162038 26.797274 22.767883 Beyond Ideal
q29 q29 54.365320 72.998331 67.816395 Beyond Ideal
q30 q30 32.157095 4.830236 7.575977 Beyond Ideal
q31 q31 160.200000 58.560861 136.542155 Beyond Average
q32 q32 44.029362 55.881313 44.316547 Beyond Average
q33 q33 7.452260 3.735182 5.003173 Beyond Ideal
q34 q34 53.374080 24.978115 41.688666 Beyond Average
# Count the number of questions in each position category
position_counts <- table(question_position_summary$Position)

# Create a data frame for the position summary
position_summary <- data.frame(
  Position = names(position_counts),
  Count = as.numeric(position_counts)
)

# Calculate the percentages manually to ensure they're numeric
position_summary$Percentage <- (position_summary$Count / sum(position_summary$Count)) * 100

# Round the percentages to one decimal place
position_summary$Percentage <- round(position_summary$Percentage, 1)

# Display the position count summary
kable(position_summary, 
      caption = "Distribution of Position Categories") %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = F)
Distribution of Position Categories
Position Count Percentage
Beyond Average 11 33.3
Beyond Ideal 22 66.7
# Create a visualization showing the distribution of positions
ggplot(position_summary, aes(x = Position, y = Count, fill = Position)) +
  geom_bar(stat = "identity") +
  geom_text(aes(label = paste0(Count, " (", Percentage, "%)")), 
            vjust = -0.5, size = 4) +
  labs(title = "Distribution of Position Categories",
       subtitle = "Where does 'Reasonable' fall relative to 'Average' and 'Ideal'?",
       x = "Position Category",
       y = "Number of Questions") +
  theme_minimal() +
  theme(
    plot.title = element_text(size = 14, face = "bold"),
    plot.subtitle = element_text(size = 12),
    axis.title = element_text(size = 12),
    axis.text = element_text(size = 10),
    legend.position = "none"
  )

# Save the position distribution plot
ggsave("position_distribution.png", width = 10, height = 6, dpi = 300)

# Create a triangle visualization to show the relationships between the ratings
# Prepare data for the triangle visualization
triangle_data <- question_position_summary

# Create a function to normalize values to 0-1 scale for better visualization
normalize <- function(x) {
  (x - min(x, na.rm = TRUE)) / (max(x, na.rm = TRUE) - min(x, na.rm = TRUE))
}

# Create a scatterplot showing the relative positions
ggplot(triangle_data, aes(x = Average_Value, y = Ideal_Value)) +
  # Add a diagonal reference line where average = ideal
  geom_abline(intercept = 0, slope = 1, linetype = "dashed", alpha = 0.5) +
  # Add points for each question's average and ideal values
  geom_point(color = "gray60", size = 3, alpha = 0.6) +
  # Add points for reasonable values, colored by position category
  geom_point(aes(x = Average_Value, y = Reasonable_Value, color = Position), size = 4) +
  # Connect reasonable to average with a line
  geom_segment(aes(xend = Average_Value, yend = Reasonable_Value, color = Position), 
               linetype = "dotted", size = 0.5) +
  # geom_text(aes(label = Question), hjust = -0.2, vjust = 0, size = 3) +
  scale_color_brewer(palette = "Set1") +
  labs(title = "Intermediacy Analysis: Position of Reasonable Relative to Average and Ideal",
       subtitle = paste0("Questions with Reasonable between Average and Ideal: ", 
                         count_both_sides, " of ", length(question_cols), 
                         " (", scales::percent(count_both_sides/length(question_cols)), ")"),
       x = "Average Rating",
       y = "Rating Value",
       color = "Position Category") +
  theme_minimal() +
  theme(
    plot.title = element_text(size = 14, face = "bold"),
    plot.subtitle = element_text(size = 12),
    axis.title = element_text(size = 12),
    axis.text = element_text(size = 10),
    legend.position = "bottom"
  )

# Save the intermediacy plot
ggsave("intermediacy_analysis.png", width = 10, height = 8, dpi = 300)

# Create a more detailed plot showing all three ratings for each question
# Convert to long format for plotting
ratings_long <- triangle_data %>%
  select(Question, Average_Value, Ideal_Value, Reasonable_Value, Position) %>%
  pivot_longer(cols = c(Average_Value, Ideal_Value, Reasonable_Value),
               names_to = "Rating_Type",
               values_to = "Value") %>%
  mutate(Rating_Type = gsub("_Value", "", Rating_Type))

# Plot the ratings for each question, grouped and colored
ggplot(ratings_long, aes(x = Rating_Type, y = Value, group = Question, color = Position)) +
  geom_line(alpha = 0.6) +
  geom_point(size = 3) +
  facet_wrap(~ Position, scales = "free_y") +
  scale_color_brewer(palette = "Set1") +
  labs(title = "Rating Patterns by Position Category",
       subtitle = "How Average, Ideal, and Reasonable ratings relate within each position category",
       x = "Rating Type",
       y = "Value") +
  theme_minimal() +
  theme(
    plot.title = element_text(size = 14, face = "bold"),
    plot.subtitle = element_text(size = 12),
    axis.title = element_text(size = 12),
    axis.text = element_text(size = 10),
    legend.position = "none"
  )

# Save the ratings pattern plot
ggsave("rating_patterns_by_position.png", width = 12, height = 8, dpi = 300)

# Store all the results in a structured list
analysis_2_results <- list(
  ideal_side = list(
    vector = ideal_side_vector,
    log_vector = ideal_side_vector_log,
    count = count_ideal_side,
    log_count = count_ideal_side_log,
    binomial_test = binomial_result_ideal,
    log_binomial_test = binomial_result_ideal_log,
    summary = ideal_side_summary
  ),
  average_side = list(
    vector = average_side_vector,
    log_vector = average_side_vector_log,
    count = count_average_side,
    log_count = count_average_side_log,
    binomial_test = binomial_result_avg_side,
    log_binomial_test = binomial_result_avg_side_log,
    summary = average_side_summary
  ),
  both_sides = list(
    vector = both_sides_vector,
    log_vector = both_sides_vector_log,
    count = count_both_sides,
    log_count = count_both_sides_log,
    binomial_test = binomial_result_both_sides,
    log_binomial_test = binomial_result_both_sides_log,
    summary = both_sides_summary
  ),
  question_position = question_position_summary,
  position_summary = position_summary
)

Analysis 3: Overall results, with median ratings

Exclude participants whose answer to Q19 does not equal 15 [comprehension/attention question]. [THIS HAS BEEN COMPLETED IN ANALYSIS 1]

Compute the median rating

# Calculate medians directly from the attention-check filtered data
column_medians_average <- sapply(Data_All_Average_Pass1[, question_cols], median, na.rm = TRUE)
column_medians_ideal <- sapply(Data_All_Ideal_Pass1[, question_cols], median, na.rm = TRUE)
column_medians_reasonable <- sapply(Data_All_Reasonable_Pass1[, question_cols], median, na.rm = TRUE)

# Create a dataframe for the medians
medians_df <- data.frame(
  Question = names(column_medians_average),
  Average = column_medians_average,
  Ideal = column_medians_ideal,
  Reasonable = column_medians_reasonable
)

# Display the medians for each question
kable(medians_df, caption = "Median Values for Each Question by Condition") %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = F)
Median Values for Each Question by Condition
Question Average Ideal Reasonable
q1 q1 3.0 2.0 2
q2 q2 7.0 2.0 0
q3 q3 3.0 6.0 8
q4 q4 2000.0 2000.0 2500
m m 20.0 30.0 120
q6 q6 10.0 1.0 0
q7 q7 15.0 5.0 20
q8 q8 5.0 12.0 100
q9 q9 5.0 3.0 0
q10 q10 5.0 0.0 3
q11 q11 500.0 0.0 0
q12 q12 30.0 1.0 0
q13 q13 40.0 10.0 25
q14 q14 10.0 2.0 30
q15 q15 9.0 10.0 25
q16 q16 5.0 5.0 5
q17 q17 3.0 0.0 3
q18 q18 15.0 2.0 12
q20 q20 20.0 0.0 0
q21 q21 8.0 5.0 0
q22 q22 10.0 7.0 1
q23 q23 2.0 3.0 1
q24 q24 16.0 24.0 5
q25 q25 2000.0 500.0 0
q26 q26 8.0 2.0 3
q27 q27 12.0 5.0 0
q28 q28 10.0 20.0 20
q29 q29 55.0 90.0 80
q30 q30 12.0 4.0 4
q31 q31 50.0 13.0 25
q32 q32 24.0 48.0 40
q33 q33 5.5 2.5 4
q34 q34 50.0 10.0 50
# Create a comparison table of medians for plotting
medians_long <- medians_df %>%
  pivot_longer(cols = c(Average, Ideal, Reasonable), 
               names_to = "Condition", 
               values_to = "Median")

Convert median responses to a natural log scale

# Check if we have any zeros in medians before taking logs
print("Checking for zeros in medians:")
## [1] "Checking for zeros in medians:"
print("Zeros in average medians:")
## [1] "Zeros in average medians:"
print(names(column_medians_average[column_medians_average == 0]))
## character(0)
print("Zeros in ideal medians:")
## [1] "Zeros in ideal medians:"
print(names(column_medians_ideal[column_medians_ideal == 0]))
## [1] "q10" "q11" "q17" "q20"
print("Zeros in reasonable medians:")
## [1] "Zeros in reasonable medians:"
print(names(column_medians_reasonable[column_medians_reasonable == 0]))
## [1] "q2"  "q6"  "q9"  "q11" "q12" "q20" "q21" "q25" "q27"
# Apply the same safe_log function used in Analysis 1
log_filtered_column_medians_average <- safe_log(column_medians_average)
log_filtered_column_medians_ideal <- safe_log(column_medians_ideal)
log_filtered_column_medians_reasonable <- safe_log(column_medians_reasonable)

# Create a dataframe for the log medians
log_medians_df <- data.frame(
  Question = names(log_filtered_column_medians_average),
  Average = log_filtered_column_medians_average,
  Ideal = log_filtered_column_medians_ideal,
  Reasonable = log_filtered_column_medians_reasonable
)

# Display the log medians
kable(log_medians_df, caption = "Log-Transformed Median Values for Each Question by Condition") %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = F)
Log-Transformed Median Values for Each Question by Condition
Question Average Ideal Reasonable
q1 q1 1.0986123 0.6931472 0.6931472
q2 q2 1.9459101 0.6931472 -4.6051702
q3 q3 1.0986123 1.7917595 2.0794415
q4 q4 7.6009025 7.6009025 7.8240460
m m 2.9957323 3.4011974 4.7874917
q6 q6 2.3025851 0.0000000 -4.6051702
q7 q7 2.7080502 1.6094379 2.9957323
q8 q8 1.6094379 2.4849066 4.6051702
q9 q9 1.6094379 1.0986123 -4.6051702
q10 q10 1.6094379 -4.6051702 1.0986123
q11 q11 6.2146081 -4.6051702 -4.6051702
q12 q12 3.4011974 0.0000000 -4.6051702
q13 q13 3.6888795 2.3025851 3.2188758
q14 q14 2.3025851 0.6931472 3.4011974
q15 q15 2.1972246 2.3025851 3.2188758
q16 q16 1.6094379 1.6094379 1.6094379
q17 q17 1.0986123 -4.6051702 1.0986123
q18 q18 2.7080502 0.6931472 2.4849066
q20 q20 2.9957323 -4.6051702 -4.6051702
q21 q21 2.0794415 1.6094379 -4.6051702
q22 q22 2.3025851 1.9459101 0.0000000
q23 q23 0.6931472 1.0986123 0.0000000
q24 q24 2.7725887 3.1780538 1.6094379
q25 q25 7.6009025 6.2146081 -4.6051702
q26 q26 2.0794415 0.6931472 1.0986123
q27 q27 2.4849066 1.6094379 -4.6051702
q28 q28 2.3025851 2.9957323 2.9957323
q29 q29 4.0073332 4.4998097 4.3820266
q30 q30 2.4849066 1.3862944 1.3862944
q31 q31 3.9120230 2.5649494 3.2188758
q32 q32 3.1780538 3.8712010 3.6888795
q33 q33 1.7047481 0.9162907 1.3862944
q34 q34 3.9120230 2.3025851 3.9120230

Conduct three regressions and record AIC

# Create data frame for regression
dataforAICMedians <- data.frame(
  Reasonable = log_filtered_column_medians_reasonable,
  Average = log_filtered_column_medians_average,
  Ideal = log_filtered_column_medians_ideal
)

# Model I: Median Average predicts Median Reasonable
Model1Medians <- lm(Reasonable ~ Average, data = dataforAICMedians)
AIC1Medians <- AIC(Model1Medians)
r2_model1_median <- summary(Model1Medians)$r.squared

# Model II: Median Ideal predicts Median Reasonable
Model2Medians <- lm(Reasonable ~ Ideal, data = dataforAICMedians)
AIC2Medians <- AIC(Model2Medians)
r2_model2_median <- summary(Model2Medians)$r.squared

# Model III: Both Median Average and Median Ideal predict Median Reasonable
Model3Medians <- lm(Reasonable ~ Average + Ideal, data = dataforAICMedians)
AIC3Medians <- AIC(Model3Medians)
r2_model3_median <- summary(Model3Medians)$r.squared

# Create a regression summary dataframe
regression_summary_medians <- data.frame(
  Model = c("Average predicts Reasonable", 
            "Ideal predicts Reasonable", 
            "Average + Ideal predict Reasonable"),
  AIC = c(AIC1Medians, AIC2Medians, AIC3Medians),
  R_squared = c(r2_model1_median, r2_model2_median, r2_model3_median)
)

# Display the regression summary table
kable(regression_summary_medians, caption = "Regression Model Comparison (Median Values)") %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = F)
Regression Model Comparison (Median Values)
Model AIC R_squared
Average predicts Reasonable 183.1287 0.0014857
Ideal predicts Reasonable 176.1116 0.1927536
Average + Ideal predict Reasonable 177.2436 0.2137099

Plots and Reporting

# Average Median vs Reasonable Median
plot1_median <- ggplot(dataforAICMedians, aes(x = Average, y = Reasonable)) +
  geom_point(size = 3, alpha = 0.7) +
  geom_smooth(method = "lm", se = TRUE, color = "blue") +
  labs(title = "Model 1: Median Average Predicts Median Reasonable",
       subtitle = paste("AIC =", round(AIC1Medians, 2), ", R² =", round(r2_model1_median, 3)),
       x = "Log Median Average Rating",
       y = "Log Median Reasonable Rating") +
  theme_minimal() +
  theme(
    plot.title = element_text(size = 14, face = "bold"),
    plot.subtitle = element_text(size = 12),
    axis.title = element_text(size = 12),
    axis.text = element_text(size = 10)
  )

# Ideal Median vs Reasonable Median
plot2_median <- ggplot(dataforAICMedians, aes(x = Ideal, y = Reasonable)) +
  geom_point(size = 3, alpha = 0.7) +
  geom_smooth(method = "lm", se = TRUE, color = "red") +
  labs(title = "Model 2: Median Ideal Predicts Median Reasonable",
       subtitle = paste("AIC =", round(AIC2Medians, 2), ", R² =", round(r2_model2_median, 3)),
       x = "Log Median Ideal Rating",
       y = "Log Median Reasonable Rating") +
  theme_minimal() +
  theme(
    plot.title = element_text(size = 14, face = "bold"),
    plot.subtitle = element_text(size = 12),
    axis.title = element_text(size = 12),
    axis.text = element_text(size = 10)
  )

# Create Model 3 visualizations for median-based analysis
# Model 3 visualization (effect of average, holding ideal constant)
new_data_median <- expand.grid(
  Average = seq(min(dataforAICMedians$Average), max(dataforAICMedians$Average), length.out = 100),
  Ideal = median(dataforAICMedians$Ideal)  # Hold Ideal constant at median
)
new_data_median$predicted_reasonable <- predict(Model3Medians, newdata = new_data_median)

plot3_median <- ggplot(dataforAICMedians, aes(x = Average, y = Reasonable)) +
  geom_point(size = 3, alpha = 0.5) +
  geom_line(data = new_data_median, aes(y = predicted_reasonable), color = "blue", size = 1) +
  labs(title = "Model 3: Effect of Median Average (Holding Median Ideal at Median)",
       subtitle = paste("AIC =", round(AIC3Medians, 2), ", R² =", round(r2_model3_median, 3)),
       x = "Log Median Average Rating",
       y = "Log Median Reasonable Rating") +
  theme_minimal() +
  theme(
    plot.title = element_text(size = 14, face = "bold"),
    plot.subtitle = element_text(size = 12),
    axis.title = element_text(size = 12),
    axis.text = element_text(size = 10)
  )

# Model 3 visualization (effect of ideal, holding average constant)
new_data2_median <- expand.grid(
  Average = median(dataforAICMedians$Average),  # Hold Average constant at median
  Ideal = seq(min(dataforAICMedians$Ideal), max(dataforAICMedians$Ideal), length.out = 100)
)
new_data2_median$predicted_reasonable <- predict(Model3Medians, newdata = new_data2_median)

plot4_median <- ggplot(dataforAICMedians, aes(x = Ideal, y = Reasonable)) +
  geom_point(size = 3, alpha = 0.5) +
  geom_line(data = new_data2_median, aes(y = predicted_reasonable), color = "red", size = 1) +
  labs(title = "Model 3: Effect of Median Ideal (Holding Median Average at Median)",
       subtitle = paste("AIC =", round(AIC3Medians, 2), ", R² =", round(r2_model3_median, 3)),
       x = "Log Median Ideal Rating",
       y = "Log Median Reasonable Rating") +
  theme_minimal() +
  theme(
    plot.title = element_text(size = 14, face = "bold"),
    plot.subtitle = element_text(size = 12),
    axis.title = element_text(size = 12),
    axis.text = element_text(size = 10)
  )

# Display plots
print(plot1_median)
## `geom_smooth()` using formula = 'y ~ x'

print(plot2_median)
## `geom_smooth()` using formula = 'y ~ x'

print(plot3_median)

print(plot4_median)

# Save plots
ggsave("model1_median_average_reasonable.png", plot1_median, width = 8, height = 6, dpi = 300)
## `geom_smooth()` using formula = 'y ~ x'
ggsave("model2_median_ideal_reasonable.png", plot2_median, width = 8, height = 6, dpi = 300)
## `geom_smooth()` using formula = 'y ~ x'
ggsave("model3_median_average_effect.png", plot3_median, width = 8, height = 6, dpi = 300)
ggsave("model3_median_ideal_effect.png", plot4_median, width = 8, height = 6, dpi = 300)

# Create a combined plot layout
grid_plots_median <- grid.arrange(plot1_median, plot2_median, plot3_median, plot4_median, ncol = 2)
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'

ggsave("all_median_regression_plots.png", grid_plots_median, width = 14, height = 10, dpi = 300)

# Create a correlation matrix for median data
cor_matrix_median <- cor(dataforAICMedians, use = "complete.obs")
corrplot(cor_matrix_median, method = "color", 
        type = "upper", 
        addCoef.col = "black",
        tl.col = "black",
        tl.srt = 45,
        diag = FALSE,
        title = "Correlation Matrix (Median Values)",
        mar = c(0,0,1,0))

# Create detailed coefficient tables
model1_coefs_median <- get_model_coefs(Model1Medians)
model2_coefs_median <- get_model_coefs(Model2Medians)
model3_coefs_median <- get_model_coefs(Model3Medians)

# Format p-values
model1_coefs_median$p_value <- sapply(model1_coefs_median$p_value, format_pvalue)
model2_coefs_median$p_value <- sapply(model2_coefs_median$p_value, format_pvalue)
model3_coefs_median$p_value <- sapply(model3_coefs_median$p_value, format_pvalue)

# Display coefficient tables
kable(model1_coefs_median, caption = "Model 1 Coefficients for Median Values (Average → Reasonable)") %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = F)
Model 1 Coefficients for Median Values (Average → Reasonable)
Term Estimate Std_Error t_value p_value
(Intercept) (Intercept) 0.4106407 1.2707513 0.3231480 0.749
Average Average 0.0844553 0.3932404 0.2147677 0.831
kable(model2_coefs_median, caption = "Model 2 Coefficients for Median Values (Ideal → Reasonable)") %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = F)
Model 2 Coefficients for Median Values (Ideal → Reasonable)
Term Estimate Std_Error t_value p_value
(Intercept) (Intercept) -0.1073312 0.6357038 -0.1688384 0.867
Ideal Ideal 0.5729619 0.2105944 2.7206895 0.011 *
kable(model3_coefs_median, caption = "Model 3 Coefficients for Median Values (Average + Ideal → Reasonable)") %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = F)
Model 3 Coefficients for Median Values (Average + Ideal → Reasonable)
Term Estimate Std_Error t_value p_value
(Intercept) (Intercept) 0.7510611 1.1525172 0.6516702 0.52
Average Average -0.3446637 0.3854503 -0.8941845 0.378
Ideal Ideal 0.6532777 0.2295785 2.8455532 0.008 **
# Print model summaries
cat("\nModel 1 Median Analysis (Average predicts Reasonable):\n")
## 
## Model 1 Median Analysis (Average predicts Reasonable):
print(summary(Model1Medians))
## 
## Call:
## lm(formula = Reasonable ~ Average, data = dataforAICMedians)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -5.6577 -5.1517  0.8317  2.4967  6.7715 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)
## (Intercept)  0.41064    1.27075   0.323    0.749
## Average      0.08446    0.39324   0.215    0.831
## 
## Residual standard error: 3.655 on 31 degrees of freedom
## Multiple R-squared:  0.001486,   Adjusted R-squared:  -0.03072 
## F-statistic: 0.04613 on 1 and 31 DF,  p-value: 0.8314
cat("\nModel 2 Median Analysis (Ideal predicts Reasonable):\n")
## 
## Model 2 Median Analysis (Ideal predicts Reasonable):
print(summary(Model2Medians))
## 
## Call:
## lm(formula = Reasonable ~ Ideal, data = dataforAICMedians)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -8.0586 -1.8593  0.9686  2.1809  3.8445 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)  
## (Intercept)  -0.1073     0.6357  -0.169   0.8670  
## Ideal         0.5730     0.2106   2.721   0.0106 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.286 on 31 degrees of freedom
## Multiple R-squared:  0.1928, Adjusted R-squared:  0.1667 
## F-statistic: 7.402 on 1 and 31 DF,  p-value: 0.01059
cat("\nModel 3 Median Analysis (Average and Ideal predict Reasonable):\n")
## 
## Model 3 Median Analysis (Average and Ideal predict Reasonable):
print(summary(Model3Medians))
## 
## Call:
## lm(formula = Reasonable ~ Average + Ideal, data = dataforAICMedians)
## 
## Residuals:
##     Min      1Q  Median      3Q     Max 
## -6.7963 -1.2299  0.6114  2.2144  4.7272 
## 
## Coefficients:
##             Estimate Std. Error t value Pr(>|t|)   
## (Intercept)   0.7511     1.1525   0.652  0.51958   
## Average      -0.3447     0.3855  -0.894  0.37834   
## Ideal         0.6533     0.2296   2.846  0.00792 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 3.297 on 30 degrees of freedom
## Multiple R-squared:  0.2137, Adjusted R-squared:  0.1613 
## F-statistic: 4.077 on 2 and 30 DF,  p-value: 0.02715
# Compare mean and median regressions
comparison_df <- data.frame(
  Model = c("Average predicts Reasonable", "Ideal predicts Reasonable", "Average + Ideal predict Reasonable"),
  Mean_Analysis_AIC = c(AIC1Means, AIC2Means, AIC3Means),
  Mean_Analysis_R2 = c(r2_model1, r2_model2, r2_model3),
  Median_Analysis_AIC = c(AIC1Medians, AIC2Medians, AIC3Medians),
  Median_Analysis_R2 = c(r2_model1_median, r2_model2_median, r2_model3_median)
)

kable(comparison_df, caption = "Comparison of Mean-Based and Median-Based Analyses") %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = F)
Comparison of Mean-Based and Median-Based Analyses
Model Mean_Analysis_AIC Mean_Analysis_R2 Median_Analysis_AIC Median_Analysis_R2
Average predicts Reasonable 184.0899 0.0010386 183.1287 0.0014857
Ideal predicts Reasonable 183.9865 0.0041640 176.1116 0.1927536
Average + Ideal predict Reasonable 185.5350 0.0176975 177.2436 0.2137099
# Store all the results in a structured list
analysis_3_results <- list(
  medians_by_question = medians_df,
  log_medians_by_question = log_medians_df,
  regression_models = list(
    model1 = Model1Medians,
    model2 = Model2Medians,
    model3 = Model3Medians
  ),
  regression_summary = regression_summary_medians,
  aic_values = c(AIC1 = AIC1Medians, AIC2 = AIC2Medians, AIC3 = AIC3Medians),
  comparison_with_means = comparison_df
)

Analysis 4: Overall results, intermediacy [following Bear & Knobe 2016] with median ratings

Determine if each median reasonableness rating is on the “ideal side” of average

# Function to check if reasonable is on the "ideal side" of average
# Equal median ratings are treated as being on the predicted side
is_ideal_side_median <- function(average, ideal, reasonable) {
  ifelse(abs(reasonable - ideal) <= abs(reasonable - average), 1, 0)
}

# Apply the function to the medians
ideal_side_vector_median <- mapply(is_ideal_side_median,
                                  column_medians_average,
                                  column_medians_ideal,
                                  column_medians_reasonable)

# Count how many questions have reasonableness on the "ideal side" of average
count_ideal_side_median <- sum(ideal_side_vector_median)

# Conduct binomial test to assess whether the proportion is greater than 50%
binomial_result_ideal_median <- binom.test(count_ideal_side_median, length(question_cols), p = 0.5, 
                                         alternative = "greater")

# Create a summary table for the binomial test results
ideal_side_summary_median <- data.frame(
  On_Ideal_Side_Count = count_ideal_side_median,
  Total_Questions = length(question_cols),
  Proportion = count_ideal_side_median/length(question_cols),
  p_value = binomial_result_ideal_median$p.value
)

# Format proportion and p-value for display
ideal_side_summary_median$Proportion <- scales::percent(ideal_side_summary_median$Proportion)
ideal_side_summary_median$p_value <- format_pvalue(ideal_side_summary_median$p_value)

# Display the binomial test results table
kable(ideal_side_summary_median, 
      caption = "Binomial Test Results: Is Median Reasonableness on the Ideal Side of Average?") %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = F)
Binomial Test Results: Is Median Reasonableness on the Ideal Side of Average?
On_Ideal_Side_Count Total_Questions Proportion p_value
25 33 76% 0.002 **

Determine if each median reasonableness rating is on the “ideal side” of average

# Function to check if reasonable is on the "average side" of ideal
is_average_side_median <- function(average, ideal, reasonable) {
  ifelse(abs(reasonable - average) <= abs(reasonable - ideal), 1, 0)
}

# Apply the function to the medians
average_side_vector_median <- mapply(is_average_side_median,
                                    column_medians_average,
                                    column_medians_ideal,
                                    column_medians_reasonable)

# Count how many questions have reasonableness on the "average side" of ideal
count_average_side_median <- sum(average_side_vector_median)

# Conduct binomial test to assess whether the proportion is greater than 50%
binomial_result_avg_side_median <- binom.test(count_average_side_median, length(question_cols), p = 0.5, 
                                            alternative = "greater")

# Create a summary table for the binomial test results
average_side_summary_median <- data.frame(
  On_Average_Side_Count = count_average_side_median,
  Total_Questions = length(question_cols),
  Proportion = count_average_side_median/length(question_cols),
  p_value = binomial_result_avg_side_median$p.value
)

# Format proportion and p-value for display
average_side_summary_median$Proportion <- scales::percent(average_side_summary_median$Proportion)
average_side_summary_median$p_value <- format_pvalue(average_side_summary_median$p_value)

# Display the binomial test results table
kable(average_side_summary_median, 
      caption = "Binomial Test Results: Is Median Reasonableness on the Average Side of Ideal?") %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = F)
Binomial Test Results: Is Median Reasonableness on the Average Side of Ideal?
On_Average_Side_Count Total_Questions Proportion p_value
12 33 36% 0.96

Determine if median reasonableness is both on the ideal side of average and on the average side of ideal

# Function to check if reasonable falls between average and ideal
# Using the same approach as Analysis 2
is_both_sides_median <- function(average, ideal, reasonable) {
  ifelse((reasonable - average) * (reasonable - ideal) <= 0, 1, 0)
}

# Apply the function to the medians
both_sides_vector_median <- mapply(is_both_sides_median,
                                  column_medians_average,
                                  column_medians_ideal,
                                  column_medians_reasonable)

# Count how many questions have reasonableness on both sides
count_both_sides_median <- sum(both_sides_vector_median)

# Conduct binomial test with 1/3 as the chance rate
binomial_result_both_sides_median <- binom.test(count_both_sides_median, length(question_cols), p = 1/3, 
                                              alternative = "two.sided")

# Create a summary table for the binomial test results
both_sides_summary_median <- data.frame(
  On_Both_Sides_Count = count_both_sides_median,
  Total_Questions = length(question_cols),
  Proportion = count_both_sides_median/length(question_cols),
  Expected_Chance = 1/3,
  p_value = binomial_result_both_sides_median$p.value
)

# Format proportions and p-values for display
both_sides_summary_median$Proportion <- scales::percent(both_sides_summary_median$Proportion)
both_sides_summary_median$Expected_Chance <- scales::percent(both_sides_summary_median$Expected_Chance)
both_sides_summary_median$p_value <- format_pvalue(both_sides_summary_median$p_value)

# Display the binomial test results table
kable(both_sides_summary_median, 
      caption = "Binomial Test Results: Is Median Reasonableness Between Average and Ideal?") %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = F)
Binomial Test Results: Is Median Reasonableness Between Average and Ideal?
On_Both_Sides_Count Total_Questions Proportion Expected_Chance p_value
16 33 48% 33% 0.094

Plots and Reporting

# Create a table showing the results for each question
question_position_summary_median <- data.frame(
  Question = names(column_medians_average),
  Average_Value = column_medians_average,
  Ideal_Value = column_medians_ideal,
  Reasonable_Value = column_medians_reasonable,
  On_Ideal_Side = ideal_side_vector_median,
  On_Average_Side = average_side_vector_median,
  Between_Average_And_Ideal = both_sides_vector_median
)

# Categorize each question's position
question_position_summary_median$Position <- "Other"
question_position_summary_median$Position[question_position_summary_median$Between_Average_And_Ideal == 1] <- "Between Average and Ideal"
question_position_summary_median$Position[question_position_summary_median$On_Ideal_Side == 1 & 
                                        question_position_summary_median$On_Average_Side == 0] <- "Beyond Ideal"
question_position_summary_median$Position[question_position_summary_median$On_Ideal_Side == 0 & 
                                        question_position_summary_median$On_Average_Side == 1] <- "Beyond Average"

# Display the question position summary
kable(question_position_summary_median[, c("Question", "Average_Value", "Ideal_Value", "Reasonable_Value", "Position")], 
      caption = "Position of Median Reasonable Rating Relative to Average and Ideal for Each Question") %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = F)
Position of Median Reasonable Rating Relative to Average and Ideal for Each Question
Question Average_Value Ideal_Value Reasonable_Value Position
q1 q1 3.0 2.0 2 Beyond Ideal
q2 q2 7.0 2.0 0 Beyond Ideal
q3 q3 3.0 6.0 8 Beyond Ideal
q4 q4 2000.0 2000.0 2500 Other
m m 20.0 30.0 120 Beyond Ideal
q6 q6 10.0 1.0 0 Beyond Ideal
q7 q7 15.0 5.0 20 Beyond Average
q8 q8 5.0 12.0 100 Beyond Ideal
q9 q9 5.0 3.0 0 Beyond Ideal
q10 q10 5.0 0.0 3 Beyond Average
q11 q11 500.0 0.0 0 Beyond Ideal
q12 q12 30.0 1.0 0 Beyond Ideal
q13 q13 40.0 10.0 25 Between Average and Ideal
q14 q14 10.0 2.0 30 Beyond Average
q15 q15 9.0 10.0 25 Beyond Ideal
q16 q16 5.0 5.0 5 Between Average and Ideal
q17 q17 3.0 0.0 3 Beyond Average
q18 q18 15.0 2.0 12 Beyond Average
q20 q20 20.0 0.0 0 Beyond Ideal
q21 q21 8.0 5.0 0 Beyond Ideal
q22 q22 10.0 7.0 1 Beyond Ideal
q23 q23 2.0 3.0 1 Beyond Average
q24 q24 16.0 24.0 5 Beyond Average
q25 q25 2000.0 500.0 0 Beyond Ideal
q26 q26 8.0 2.0 3 Beyond Ideal
q27 q27 12.0 5.0 0 Beyond Ideal
q28 q28 10.0 20.0 20 Beyond Ideal
q29 q29 55.0 90.0 80 Beyond Ideal
q30 q30 12.0 4.0 4 Beyond Ideal
q31 q31 50.0 13.0 25 Beyond Ideal
q32 q32 24.0 48.0 40 Beyond Ideal
q33 q33 5.5 2.5 4 Between Average and Ideal
q34 q34 50.0 10.0 50 Beyond Average
# Count the number of questions in each position category
position_counts_median <- table(question_position_summary_median$Position)

# Create a data frame for the position summary
position_summary_median <- data.frame(
  Position = names(position_counts_median),
  Count = as.numeric(position_counts_median)
)

# Calculate the percentages manually to ensure they're numeric
position_summary_median$Percentage <- (position_summary_median$Count / sum(position_summary_median$Count)) * 100

# Round the percentages to one decimal place
position_summary_median$Percentage <- round(position_summary_median$Percentage, 1)

# Display the position count summary
kable(position_summary_median, 
      caption = "Distribution of Position Categories (Median Values)") %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = F)
Distribution of Position Categories (Median Values)
Position Count Percentage
Between Average and Ideal 3 9.1
Beyond Average 8 24.2
Beyond Ideal 21 63.6
Other 1 3.0
# Create a visualization showing the distribution of positions
position_distribution_plot_median <- ggplot(position_summary_median, aes(x = Position, y = Count, fill = Position)) +
  geom_bar(stat = "identity") +
  geom_text(aes(label = paste0(Count, " (", Percentage, "%)")), 
            vjust = -0.5, size = 4) +
  labs(title = "Distribution of Position Categories (Median Values)",
       subtitle = "Where does 'Reasonable' fall relative to 'Average' and 'Ideal'?",
       x = "Position Category",
       y = "Number of Questions") +
  theme_minimal() +
  theme(
    plot.title = element_text(size = 14, face = "bold"),
    plot.subtitle = element_text(size = 12),
    axis.title = element_text(size = 12),
    axis.text = element_text(size = 10),
    legend.position = "none"
  )

# Display the plot
print(position_distribution_plot_median)

# Save the position distribution plot
ggsave("position_distribution_median.png", position_distribution_plot_median, width = 10, height = 6, dpi = 300)

# Create a scatterplot showing the relative positions
intermediacy_plot_median <- ggplot(question_position_summary_median, aes(x = Average_Value, y = Ideal_Value)) +
  # Add a diagonal reference line where average = ideal
  geom_abline(intercept = 0, slope = 1, linetype = "dashed", alpha = 0.5) +
  # Add points for each question's average and ideal values
  geom_point(color = "gray60", size = 3, alpha = 0.6) +
  # Add points for reasonable values, colored by position category
  geom_point(aes(x = Average_Value, y = Reasonable_Value, color = Position), size = 4) +
  # Connect reasonable to average with a line
  geom_segment(aes(xend = Average_Value, yend = Reasonable_Value, color = Position), 
               linetype = "dotted", size = 0.5) +
  scale_color_brewer(palette = "Set1") +
  labs(title = "Intermediacy Analysis: Position of Median Reasonable Relative to Average and Ideal",
       subtitle = paste0("Questions with Reasonable between Average and Ideal: ", 
                         count_both_sides_median, " of ", length(question_cols), 
                         " (", scales::percent(count_both_sides_median/length(question_cols)), ")"),
       x = "Median Average Rating",
       y = "Rating Value",
       color = "Position Category") +
  theme_minimal() +
  theme(
    plot.title = element_text(size = 14, face = "bold"),
    plot.subtitle = element_text(size = 12),
    axis.title = element_text(size = 12),
    axis.text = element_text(size = 10),
    legend.position = "bottom"
  )

# Display the plot
print(intermediacy_plot_median)

# Save the intermediacy plot
ggsave("intermediacy_analysis_median.png", intermediacy_plot_median, width = 10, height = 8, dpi = 300)

# Create a more detailed plot showing all three ratings for each question
# Convert to long format for plotting
ratings_long_median <- question_position_summary_median %>%
  select(Question, Average_Value, Ideal_Value, Reasonable_Value, Position) %>%
  pivot_longer(cols = c(Average_Value, Ideal_Value, Reasonable_Value),
               names_to = "Rating_Type",
               values_to = "Value") %>%
  mutate(Rating_Type = gsub("_Value", "", Rating_Type))

# Plot the ratings for each question, grouped and colored
rating_patterns_plot_median <- ggplot(ratings_long_median, aes(x = Rating_Type, y = Value, group = Question, color = Position)) +
  geom_line(alpha = 0.6) +
  geom_point(size = 3) +
  facet_wrap(~ Position, scales = "free_y") +
  scale_color_brewer(palette = "Set1") +
  labs(title = "Rating Patterns by Position Category (Median Values)",
       subtitle = "How Average, Ideal, and Reasonable median ratings relate within each position category",
       x = "Rating Type",
       y = "Value") +
  theme_minimal() +
  theme(
    plot.title = element_text(size = 14, face = "bold"),
    plot.subtitle = element_text(size = 12),
    axis.title = element_text(size = 12),
    axis.text = element_text(size = 10),
    legend.position = "none"
  )

# Display the plot
print(rating_patterns_plot_median)

# Save the ratings pattern plot
ggsave("rating_patterns_by_position_median.png", rating_patterns_plot_median, width = 12, height = 8, dpi = 300)

# Compare the results between mean-based and median-based intermediacy analyses
comparison_intermediacy <- data.frame(
  Analysis_Type = c("Mean-based Analysis", "Median-based Analysis"),
  On_Ideal_Side_Percentage = c(
    paste0(count_ideal_side, "/", length(question_cols), " (", scales::percent(count_ideal_side/length(question_cols)), ")"),
    paste0(count_ideal_side_median, "/", length(question_cols), " (", scales::percent(count_ideal_side_median/length(question_cols)), ")")
  ),
  On_Average_Side_Percentage = c(
    paste0(count_average_side, "/", length(question_cols), " (", scales::percent(count_average_side/length(question_cols)), ")"),
    paste0(count_average_side_median, "/", length(question_cols), " (", scales::percent(count_average_side_median/length(question_cols)), ")")
  ),
  Between_Average_And_Ideal_Percentage = c(
    paste0(count_both_sides, "/", length(question_cols), " (", scales::percent(count_both_sides/length(question_cols)), ")"),
    paste0(count_both_sides_median, "/", length(question_cols), " (", scales::percent(count_both_sides_median/length(question_cols)), ")")
  )
)

# Display the comparison table
kable(comparison_intermediacy, caption = "Comparison of Mean-based and Median-based Intermediacy Analyses") %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = F)
Comparison of Mean-based and Median-based Intermediacy Analyses
Analysis_Type On_Ideal_Side_Percentage On_Average_Side_Percentage Between_Average_And_Ideal_Percentage
Mean-based Analysis 22/33 (67%) 11/33 (33%) 10/33 (30%)
Median-based Analysis 25/33 (76%) 12/33 (36%) 16/33 (48%)
# Store all the results in a structured list
analysis_4_results <- list(
  ideal_side = list(
    vector = ideal_side_vector_median,
    count = count_ideal_side_median,
    binomial_test = binomial_result_ideal_median,
    summary = ideal_side_summary_median
  ),
  average_side = list(
    vector = average_side_vector_median,
    count = count_average_side_median,
    binomial_test = binomial_result_avg_side_median,
    summary = average_side_summary_median
  ),
  both_sides = list(
    vector = both_sides_vector_median,
    count = count_both_sides_median,
    binomial_test = binomial_result_both_sides_median,
    summary = both_sides_summary_median
  ),
  question_position = question_position_summary_median,
  position_summary = position_summary_median,
  comparison_with_means = comparison_intermediacy
)

Analysis 5: Overall results, mean ratings [following Bear & Knobe 2016] BY COUNTRY

#### Get list of unique countries in the dataset (confirming ten)
countries <- unique(Data_All$Country_name)
print(paste("Number of countries in dataset:", length(countries)))
## [1] "Number of countries in dataset: 10"
print("Countries included in analysis:")
## [1] "Countries included in analysis:"
print(countries)
##  [1] "USA"         "Colombia"    "Brazil"      "Germany"     "India"      
##  [6] "Lithuania"   "Italy"       "Poland"      "Spain"       "Netherlands"
# Create storage for country-specific results
country_results_mean <- list()
# Function to analyze data for a single country
analyze_country_mean <- function(country) {
  cat("\n========== Analyzing Mean Data for:", country, "==========\n")
  
  # Step 1: Split data by condition for this country
  country_average <- Data_All_Average %>% filter(Country_name == country)
  country_ideal <- Data_All_Ideal %>% filter(Country_name == country)
  country_reasonable <- Data_All_Reasonable %>% filter(Country_name == country)
  
  # Apply attention check filter (q19 == 15)
  country_average_pass <- country_average %>% filter(q19 == 15)
  country_ideal_pass <- country_ideal %>% filter(q19 == 15)
  country_reasonable_pass <- country_reasonable %>% filter(q19 == 15)
  
  # Print participant counts
  cat("Participants after attention check filter:\n")
  cat("Average condition:", nrow(country_average_pass), "\n")
  cat("Ideal condition:", nrow(country_ideal_pass), "\n")
  cat("Reasonable condition:", nrow(country_reasonable_pass), "\n")
  
  # Convert columns to numeric
  country_average_pass <- country_average_pass %>%
    mutate(across(all_of(question_cols), ~as.numeric(as.character(.))))
  
  country_ideal_pass <- country_ideal_pass %>%
    mutate(across(all_of(question_cols), ~as.numeric(as.character(.))))
  
  country_reasonable_pass <- country_reasonable_pass %>%
    mutate(across(all_of(question_cols), ~as.numeric(as.character(.))))
  
  # Step 2: Apply 3 SD filtering
  # For average condition
  country_avg_filter_results <- filter_data(country_average_pass, question_cols)
  country_average_filtered <- country_avg_filter_results$filtered_data
  
  # For ideal condition
  country_ideal_filter_results <- filter_data(country_ideal_pass, question_cols)
  country_ideal_filtered <- country_ideal_filter_results$filtered_data
  
  # For reasonable condition
  country_reasonable_filter_results <- filter_data(country_reasonable_pass, question_cols)
  country_reasonable_filtered <- country_reasonable_filter_results$filtered_data
  
  # Calculate means after filtering
  country_means_average <- sapply(country_average_filtered[, question_cols], mean, na.rm = TRUE)
  country_means_ideal <- sapply(country_ideal_filtered[, question_cols], mean, na.rm = TRUE)
  country_means_reasonable <- sapply(country_reasonable_filtered[, question_cols], mean, na.rm = TRUE)
  
  # Step 3: Convert to log scale
  country_log_means_average <- safe_log(country_means_average)
  country_log_means_ideal <- safe_log(country_means_ideal)
  country_log_means_reasonable <- safe_log(country_means_reasonable)
  
  # Step 4: Run regression models
  country_dataforAIC <- data.frame(
    Reasonable = country_log_means_reasonable,
    Average = country_log_means_average,
    Ideal = country_log_means_ideal
  )
  
  # Skip regression if we have too few data points
  if(nrow(country_dataforAIC) < 5) {
    cat("Too few data points for regression analysis in", country, "\n")
    return(NULL)
  }
  
  # Model I: Average predicts reasonable
  country_model1 <- lm(Reasonable ~ Average, data = country_dataforAIC)
  country_aic1 <- AIC(country_model1)
  country_r2_1 <- summary(country_model1)$r.squared
  
  # Model II: Ideal predicts reasonable
  country_model2 <- lm(Reasonable ~ Ideal, data = country_dataforAIC)
  country_aic2 <- AIC(country_model2)
  country_r2_2 <- summary(country_model2)$r.squared
  
  # Model III: Both average and ideal predict reasonable
  country_model3 <- lm(Reasonable ~ Average + Ideal, data = country_dataforAIC)
  country_aic3 <- AIC(country_model3)
  country_r2_3 <- summary(country_model3)$r.squared
  
  # Create regression summary
  country_reg_summary <- data.frame(
    Model = c("Average predicts Reasonable", 
              "Ideal predicts Reasonable", 
              "Average + Ideal predict Reasonable"),
    AIC = c(country_aic1, country_aic2, country_aic3),
    R_squared = c(country_r2_1, country_r2_2, country_r2_3)
  )
  
  # Determine which model has the lowest AIC (best fit)
  best_model_idx <- which.min(c(country_aic1, country_aic2, country_aic3))
  best_model_name <- c("Average", "Ideal", "Hybrid")[best_model_idx]
  
  cat("\nRegression Results for", country, ":\n")
  print(country_reg_summary)
  cat("\nBest model for", country, "is:", best_model_name, "\n")
  
  # Return results for this country
  return(list(
    country = country,
    sample_sizes = list(
      original = c(nrow(country_average), nrow(country_ideal), nrow(country_reasonable)),
      after_attention = c(nrow(country_average_pass), nrow(country_ideal_pass), nrow(country_reasonable_pass)),
      after_filtering = c(nrow(country_average_filtered), nrow(country_ideal_filtered), nrow(country_reasonable_filtered))
    ),
    means = list(
      average = country_means_average,
      ideal = country_means_ideal,
      reasonable = country_means_reasonable
    ),
    log_means = list(
      average = country_log_means_average,
      ideal = country_log_means_ideal,
      reasonable = country_log_means_reasonable
    ),
    regression_models = list(
      model1 = country_model1,
      model2 = country_model2,
      model3 = country_model3
    ),
    regression_summary = country_reg_summary,
    aic_values = c(Model1 = country_aic1, Model2 = country_aic2, Model3 = country_aic3),
    best_model = best_model_name
  ))
}
# Analyze data for each country
for(country in countries) {
  country_results_mean[[country]] <- analyze_country_mean(country)
}
## 
## ========== Analyzing Mean Data for: USA ==========
## Participants after attention check filter:
## Average condition: 73 
## Ideal condition: 72 
## Reasonable condition: 69 
## 
## Regression Results for USA :
##                                Model      AIC    R_squared
## 1        Average predicts Reasonable 184.5695 0.0009859579
## 2          Ideal predicts Reasonable 181.8299 0.0805742574
## 3 Average + Ideal predict Reasonable 179.3529 0.1972186736
## 
## Best model for USA is: Hybrid 
## 
## ========== Analyzing Mean Data for: Colombia ==========
## Participants after attention check filter:
## Average condition: 89 
## Ideal condition: 84 
## Reasonable condition: 81
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `across(all_of(question_cols), ~as.numeric(as.character(.)))`.
## Caused by warning:
## ! NAs introduced by coercion
## 
## Regression Results for Colombia :
##                                Model      AIC    R_squared
## 1        Average predicts Reasonable 184.2236 3.101717e-05
## 2          Ideal predicts Reasonable 183.1142 3.308842e-02
## 3 Average + Ideal predict Reasonable 180.3834 1.622269e-01
## 
## Best model for Colombia is: Hybrid 
## 
## ========== Analyzing Mean Data for: Brazil ==========
## Participants after attention check filter:
## Average condition: 43 
## Ideal condition: 56 
## Reasonable condition: 52 
## 
## Regression Results for Brazil :
##                                Model      AIC   R_squared
## 1        Average predicts Reasonable 183.3230 0.004453007
## 2          Ideal predicts Reasonable 183.2444 0.006819475
## 3 Average + Ideal predict Reasonable 182.4260 0.088123747
## 
## Best model for Brazil is: Hybrid 
## 
## ========== Analyzing Mean Data for: Germany ==========
## Participants after attention check filter:
## Average condition: 71 
## Ideal condition: 72 
## Reasonable condition: 72 
## 
## Regression Results for Germany :
##                                Model      AIC    R_squared
## 1        Average predicts Reasonable 183.8982 0.0001830072
## 2          Ideal predicts Reasonable 179.6743 0.1203071972
## 3 Average + Ideal predict Reasonable 180.0991 0.1613116299
## 
## Best model for Germany is: Ideal 
## 
## ========== Analyzing Mean Data for: India ==========
## Participants after attention check filter:
## Average condition: 74 
## Ideal condition: 71 
## Reasonable condition: 56 
## 
## Regression Results for India :
##                                Model      AIC   R_squared
## 1        Average predicts Reasonable 181.6445 0.003266586
## 2          Ideal predicts Reasonable 181.6813 0.002156856
## 3 Average + Ideal predict Reasonable 183.6439 0.003286259
## 
## Best model for India is: Average 
## 
## ========== Analyzing Mean Data for: Lithuania ==========
## Participants after attention check filter:
## Average condition: 76 
## Ideal condition: 76 
## Reasonable condition: 77 
## 
## Regression Results for Lithuania :
##                                Model      AIC   R_squared
## 1        Average predicts Reasonable 183.3373 0.007818933
## 2          Ideal predicts Reasonable 183.4802 0.003515395
## 3 Average + Ideal predict Reasonable 179.5624 0.167104625
## 
## Best model for Lithuania is: Hybrid 
## 
## ========== Analyzing Mean Data for: Italy ==========
## Participants after attention check filter:
## Average condition: 73 
## Ideal condition: 83 
## Reasonable condition: 75 
## 
## Regression Results for Italy :
##                                Model      AIC  R_squared
## 1        Average predicts Reasonable 183.4638 0.00105871
## 2          Ideal predicts Reasonable 181.6477 0.05454873
## 3 Average + Ideal predict Reasonable 181.2000 0.12213542
## 
## Best model for Italy is: Hybrid 
## 
## ========== Analyzing Mean Data for: Poland ==========
## Participants after attention check filter:
## Average condition: 90 
## Ideal condition: 87 
## Reasonable condition: 90
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `across(all_of(question_cols), ~as.numeric(as.character(.)))`.
## Caused by warning:
## ! NAs introduced by coercion
## 
## Regression Results for Poland :
##                                Model      AIC    R_squared
## 1        Average predicts Reasonable 183.4983 0.0006096071
## 2          Ideal predicts Reasonable 181.3345 0.0640355998
## 3 Average + Ideal predict Reasonable 179.8445 0.1579659186
## 
## Best model for Poland is: Hybrid 
## 
## ========== Analyzing Mean Data for: Spain ==========
## Participants after attention check filter:
## Average condition: 73 
## Ideal condition: 69 
## Reasonable condition: 72 
## 
## Regression Results for Spain :
##                                Model      AIC   R_squared
## 1        Average predicts Reasonable 182.8232 0.007816569
## 2          Ideal predicts Reasonable 177.1617 0.164234746
## 3 Average + Ideal predict Reasonable 176.0171 0.240200039
## 
## Best model for Spain is: Hybrid 
## 
## ========== Analyzing Mean Data for: Netherlands ==========
## Participants after attention check filter:
## Average condition: 131 
## Ideal condition: 130 
## Reasonable condition: 114 
## 
## Regression Results for Netherlands :
##                                Model      AIC   R_squared
## 1        Average predicts Reasonable 183.7478 0.004381741
## 2          Ideal predicts Reasonable 182.7997 0.032579809
## 3 Average + Ideal predict Reasonable 183.9397 0.057464764
## 
## Best model for Netherlands is: Ideal
# Extract AIC values and best models for all countries
country_aic_summary <- data.frame(
  Country = character(),
  AIC_Average = numeric(),
  AIC_Ideal = numeric(),
  AIC_Hybrid = numeric(),
  R2_Average = numeric(),
  R2_Ideal = numeric(),
  R2_Hybrid = numeric(),
  Best_Model = character(),
  stringsAsFactors = FALSE
)

for(country in countries) {
  if(!is.null(country_results_mean[[country]])) {
    country_aic_summary <- rbind(country_aic_summary, data.frame(
      Country = country,
      AIC_Average = country_results_mean[[country]]$aic_values["Model1"],
      AIC_Ideal = country_results_mean[[country]]$aic_values["Model2"],
      AIC_Hybrid = country_results_mean[[country]]$aic_values["Model3"],
      R2_Average = country_results_mean[[country]]$regression_summary$R_squared[1],
      R2_Ideal = country_results_mean[[country]]$regression_summary$R_squared[2],
      R2_Hybrid = country_results_mean[[country]]$regression_summary$R_squared[3],
      Best_Model = country_results_mean[[country]]$best_model,
      stringsAsFactors = FALSE
    ))
  }
}

# Display the summary table
kable(country_aic_summary, 
      caption = "AIC Values and Best Models by Country (Mean Analysis)") %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = F) %>%
  column_spec(8, background = ifelse(country_aic_summary$Best_Model == "Hybrid", "#e6f7ff", 
                                   ifelse(country_aic_summary$Best_Model == "Average", "#e6ffe6", "#ffe6e6")))
AIC Values and Best Models by Country (Mean Analysis)
Country AIC_Average AIC_Ideal AIC_Hybrid R2_Average R2_Ideal R2_Hybrid Best_Model
Model1 USA 184.5695 181.8299 179.3529 0.0009860 0.0805743 0.1972187 Hybrid
Model11 Colombia 184.2236 183.1142 180.3834 0.0000310 0.0330884 0.1622269 Hybrid
Model12 Brazil 183.3230 183.2444 182.4260 0.0044530 0.0068195 0.0881237 Hybrid
Model13 Germany 183.8982 179.6743 180.0991 0.0001830 0.1203072 0.1613116 Ideal
Model14 India 181.6445 181.6813 183.6439 0.0032666 0.0021569 0.0032863 Average
Model15 Lithuania 183.3373 183.4802 179.5624 0.0078189 0.0035154 0.1671046 Hybrid
Model16 Italy 183.4638 181.6477 181.2000 0.0010587 0.0545487 0.1221354 Hybrid
Model17 Poland 183.4983 181.3345 179.8445 0.0006096 0.0640356 0.1579659 Hybrid
Model18 Spain 182.8232 177.1617 176.0171 0.0078166 0.1642347 0.2402000 Hybrid
Model19 Netherlands 183.7478 182.7997 183.9397 0.0043817 0.0325798 0.0574648 Ideal
# Count best models
best_model_counts <- table(country_aic_summary$Best_Model)

# Create a bar chart of best models by country
ggplot(country_aic_summary, aes(x = reorder(Country, -AIC_Hybrid), y = AIC_Hybrid, fill = Best_Model)) +
  geom_bar(stat = "identity") +
  geom_text(aes(label = Best_Model), vjust = -0.5, size = 3) +
  labs(title = "Model Comparison Across Countries (Mean Analysis)",
       subtitle = "Hybrid Model AIC Values with Best Model Indicated",
       x = "Country",
       y = "AIC Value (Hybrid Model)",
       fill = "Best Model") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

# Create plots showing regression results for each country
country_plots <- list()

for(country in countries) {
  if(!is.null(country_results_mean[[country]])) {
    # Create a dataframe for plotting
    plot_data <- data.frame(
      Average = country_results_mean[[country]]$log_means$average,
      Ideal = country_results_mean[[country]]$log_means$ideal,
      Reasonable = country_results_mean[[country]]$log_means$reasonable
    )
    
    # Create plot for this country
    country_plot <- ggplot(plot_data) +
      geom_point(aes(x = Average, y = Reasonable), color = "blue", size = 3) +
      geom_smooth(aes(x = Average, y = Reasonable), method = "lm", se = TRUE, color = "blue", linetype = "solid") +
      geom_point(aes(x = Ideal, y = Reasonable), color = "red", size = 3) +
      geom_smooth(aes(x = Ideal, y = Reasonable), method = "lm", se = TRUE, color = "red", linetype = "dashed") +
      labs(title = paste0("Regression Results for ", country),
           subtitle = paste0("Best model: ", country_results_mean[[country]]$best_model),
           x = "Log Predictor Values",
           y = "Log Reasonable Values") +
      theme_minimal() +
      annotate("text", x = min(plot_data$Average, plot_data$Ideal, na.rm = TRUE), 
               y = max(plot_data$Reasonable, na.rm = TRUE),
               label = paste0("Average R² = ", round(country_results_mean[[country]]$regression_summary$R_squared[1], 3),
                            "\nIdeal R² = ", round(country_results_mean[[country]]$regression_summary$R_squared[2], 3),
                            "\nHybrid R² = ", round(country_results_mean[[country]]$regression_summary$R_squared[3], 3)),
               hjust = 0, vjust = 1)
    
    country_plots[[country]] <- country_plot
    print(country_plot)
  }
}
## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'

## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'

## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'

## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'

## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'

## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'

## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'

## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'

## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'

## `geom_smooth()` using formula = 'y ~ x'
## `geom_smooth()` using formula = 'y ~ x'

# Create plots comparing the regression coefficients across countries
# Extract coefficients for Average and Ideal from the hybrid model
coefficient_data <- data.frame(
  Country = character(),
  Average_Coef = numeric(),
  Average_P = numeric(),
  Ideal_Coef = numeric(),
  Ideal_P = numeric(),
  stringsAsFactors = FALSE
)

for(country in countries) {
  if(!is.null(country_results_mean[[country]])) {
    # Extract coefficients from hybrid model
    model3_coefs <- summary(country_results_mean[[country]]$regression_models$model3)$coefficients
    
    if(nrow(model3_coefs) >= 3) {  # Ensure we have coefficients for both predictors
      coefficient_data <- rbind(coefficient_data, data.frame(
        Country = country,
        Average_Coef = model3_coefs[2, 1],  # Coefficient for Average
        Average_P = model3_coefs[2, 4],     # p-value for Average
        Ideal_Coef = model3_coefs[3, 1],    # Coefficient for Ideal
        Ideal_P = model3_coefs[3, 4],       # p-value for Ideal
        stringsAsFactors = FALSE
      ))
    }
  }
}

# Format p-values
coefficient_data$Average_Sig <- ifelse(coefficient_data$Average_P < 0.05, "*", "")
coefficient_data$Ideal_Sig <- ifelse(coefficient_data$Ideal_P < 0.05, "*", "")

# Create long format for coefficient plotting
coef_long <- coefficient_data %>%
  select(Country, Average_Coef, Ideal_Coef) %>%
  pivot_longer(cols = c(Average_Coef, Ideal_Coef),
               names_to = "Predictor",
               values_to = "Coefficient") %>%
  mutate(Predictor = gsub("_Coef", "", Predictor))

# Create coefficient plot
ggplot(coef_long, aes(x = Country, y = Coefficient, fill = Predictor)) +
  geom_bar(stat = "identity", position = position_dodge()) +
  geom_text(aes(label = ifelse(Predictor == "Average", 
                              coefficient_data$Average_Sig[match(Country, coefficient_data$Country)],
                              coefficient_data$Ideal_Sig[match(Country, coefficient_data$Country)])),
            position = position_dodge(width = 0.9), vjust = -0.5) +
  labs(title = "Regression Coefficients by Country (Hybrid Model)",
       subtitle = "* indicates p < 0.05",
       x = "Country",
       y = "Coefficient Value") +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

# Save results
analysis_5_results <- list(
  country_results = country_results_mean,
  aic_summary = country_aic_summary,
  coefficient_data = coefficient_data
)

Analysis 6: Overall results, intermediacy [following Bear & Knobe 2016] BY COUNTRY

# Create storage for country-specific intermediacy results
country_intermediacy_results <- list()

# Function to analyze intermediacy for a single country
analyze_country_intermediacy <- function(country) {
  cat("\n========== Analyzing Intermediacy for:", country, "==========\n")
  
  # We'll use the filtered mean data from Analysis 5
  # If country data wasn't successfully processed in Analysis 5, return NULL
  if(is.null(country_results_mean[[country]])) {
    cat("No valid data for", country, "\n")
    return(NULL)
  }
  
  # Extract the mean values
  country_means_average <- country_results_mean[[country]]$means$average
  country_means_ideal <- country_results_mean[[country]]$means$ideal
  country_means_reasonable <- country_results_mean[[country]]$means$reasonable
  
  # Step 3: Check if reasonable is on the "ideal side" of average
  ideal_side_vector <- mapply(is_both_sides,
                             country_means_average,
                             country_means_ideal,
                             country_means_reasonable)
  
  count_ideal_side <- sum(ideal_side_vector)
  
  # Conduct binomial test
  binomial_result_ideal <- binom.test(count_ideal_side, length(ideal_side_vector), 
                                     p = 0.5, alternative = "greater")
  
  # Step 4: Check if reasonable is on the "average side" of ideal
  average_side_vector <- mapply(is_average_side,
                               country_means_average,
                               country_means_ideal,
                               country_means_reasonable)
  
  count_average_side <- sum(average_side_vector)
  
  # Conduct binomial test
  binomial_result_avg_side <- binom.test(count_average_side, length(average_side_vector), 
                                        p = 0.5, alternative = "greater")
  
  # Step 5: Check if reasonable is both on the ideal side of average and average side of ideal
  both_sides_vector <- mapply(is_both_sides,
                             country_means_average,
                             country_means_ideal,
                             country_means_reasonable)
  
  count_both_sides <- sum(both_sides_vector)
  
  # Conduct binomial test with 1/3 as the chance rate
  binomial_result_both_sides <- binom.test(count_both_sides, length(both_sides_vector), 
                                          p = 1/3, alternative = "two.sided")
  
  # Create a summary table
  intermediacy_summary <- data.frame(
    Test = c("On Ideal Side of Average", "On Average Side of Ideal", "Between Average and Ideal"),
    Count = c(count_ideal_side, count_average_side, count_both_sides),
    Total = rep(length(ideal_side_vector), 3),
    Proportion = c(count_ideal_side/length(ideal_side_vector), 
                  count_average_side/length(average_side_vector), 
                  count_both_sides/length(both_sides_vector)),
    p_value = c(binomial_result_ideal$p.value, 
               binomial_result_avg_side$p.value, 
               binomial_result_both_sides$p.value)
  )
  
  # Format proportions and p-values
  intermediacy_summary$Proportion <- scales::percent(intermediacy_summary$Proportion)
  intermediacy_summary$p_value <- sapply(intermediacy_summary$p_value, format_pvalue)
  
  cat("\nIntermediary Analysis for", country, ":\n")
  print(intermediacy_summary)
  
  # Return results
  return(list(
    country = country,
    ideal_side = list(
      vector = ideal_side_vector,
      count = count_ideal_side,
      proportion = count_ideal_side/length(ideal_side_vector),
      binomial_test = binomial_result_ideal
    ),
    average_side = list(
      vector = average_side_vector,
      count = count_average_side,
      proportion = count_average_side/length(average_side_vector),
      binomial_test = binomial_result_avg_side
    ),
    both_sides = list(
      vector = both_sides_vector,
      count = count_both_sides,
      proportion = count_both_sides/length(both_sides_vector),
      binomial_test = binomial_result_both_sides
    ),
    summary = intermediacy_summary
  ))
}
# Analyze intermediacy for each country
for(country in countries) {
  country_intermediacy_results[[country]] <- analyze_country_intermediacy(country)
}
## 
## ========== Analyzing Intermediacy for: USA ==========
## 
## Intermediary Analysis for USA :
##                        Test Count Total Proportion p_value
## 1  On Ideal Side of Average    10    33        30%   0.993
## 2  On Average Side of Ideal    10    33        30%   0.993
## 3 Between Average and Ideal    10    33        30%   0.854
## 
## ========== Analyzing Intermediacy for: Colombia ==========
## 
## Intermediary Analysis for Colombia :
##                        Test Count Total Proportion p_value
## 1  On Ideal Side of Average     7    33      21.2%       1
## 2  On Average Side of Ideal    10    33      30.3%   0.993
## 3 Between Average and Ideal     7    33      21.2%   0.195
## 
## ========== Analyzing Intermediacy for: Brazil ==========
## 
## Intermediary Analysis for Brazil :
##                        Test Count Total Proportion p_value
## 1  On Ideal Side of Average     6    33      18.2%       1
## 2  On Average Side of Ideal     7    33      21.2%       1
## 3 Between Average and Ideal     6    33      18.2%   0.067
## 
## ========== Analyzing Intermediacy for: Germany ==========
## 
## Intermediary Analysis for Germany :
##                        Test Count Total Proportion p_value
## 1  On Ideal Side of Average    12    33      36.4%    0.96
## 2  On Average Side of Ideal    11    33      33.3%   0.982
## 3 Between Average and Ideal    12    33      36.4%   0.714
## 
## ========== Analyzing Intermediacy for: India ==========
## 
## Intermediary Analysis for India :
##                        Test Count Total Proportion  p_value
## 1  On Ideal Side of Average     4    33        12%        1
## 2  On Average Side of Ideal    12    33        36%     0.96
## 3 Between Average and Ideal     4    33        12% 0.009 **
## 
## ========== Analyzing Intermediacy for: Lithuania ==========
## 
## Intermediary Analysis for Lithuania :
##                        Test Count Total Proportion p_value
## 1  On Ideal Side of Average     9    33      27.3%   0.998
## 2  On Average Side of Ideal    12    33      36.4%    0.96
## 3 Between Average and Ideal     9    33      27.3%    0.58
## 
## ========== Analyzing Intermediacy for: Italy ==========
## 
## Intermediary Analysis for Italy :
##                        Test Count Total Proportion p_value
## 1  On Ideal Side of Average    15    33        45%   0.757
## 2  On Average Side of Ideal     6    33        18%       1
## 3 Between Average and Ideal    15    33        45%   0.143
## 
## ========== Analyzing Intermediacy for: Poland ==========
## 
## Intermediary Analysis for Poland :
##                        Test Count Total Proportion p_value
## 1  On Ideal Side of Average    13    33      39.4%   0.919
## 2  On Average Side of Ideal    10    33      30.3%   0.993
## 3 Between Average and Ideal    13    33      39.4%   0.464
## 
## ========== Analyzing Intermediacy for: Spain ==========
## 
## Intermediary Analysis for Spain :
##                        Test Count Total Proportion p_value
## 1  On Ideal Side of Average    11    33      33.3%   0.982
## 2  On Average Side of Ideal     9    33      27.3%   0.998
## 3 Between Average and Ideal    11    33      33.3%       1
## 
## ========== Analyzing Intermediacy for: Netherlands ==========
## 
## Intermediary Analysis for Netherlands :
##                        Test Count Total Proportion p_value
## 1  On Ideal Side of Average    11    33      33.3%   0.982
## 2  On Average Side of Ideal     9    33      27.3%   0.998
## 3 Between Average and Ideal    11    33      33.3%       1
# Create summary table for intermediacy across countries
intermediacy_by_country <- data.frame(
  Country = character(),
  Ideal_Side_Proportion = numeric(),
  Ideal_Side_P = numeric(),
  Average_Side_Proportion = numeric(),
  Average_Side_P = numeric(),
  Between_Proportion = numeric(),
  Between_P = numeric(),
  stringsAsFactors = FALSE
)

for(country in countries) {
  if(!is.null(country_intermediacy_results[[country]])) {
    intermediacy_by_country <- rbind(intermediacy_by_country, data.frame(
      Country = country,
      Ideal_Side_Proportion = country_intermediacy_results[[country]]$ideal_side$proportion,
      Ideal_Side_P = country_intermediacy_results[[country]]$ideal_side$binomial_test$p.value,
      Average_Side_Proportion = country_intermediacy_results[[country]]$average_side$proportion,
      Average_Side_P = country_intermediacy_results[[country]]$average_side$binomial_test$p.value,
      Between_Proportion = country_intermediacy_results[[country]]$both_sides$proportion,
      Between_P = country_intermediacy_results[[country]]$both_sides$binomial_test$p.value,
      stringsAsFactors = FALSE
    ))
  }
}

# Format the proportions as percentages
intermediacy_by_country$Ideal_Side_Proportion <- scales::percent(intermediacy_by_country$Ideal_Side_Proportion)
intermediacy_by_country$Average_Side_Proportion <- scales::percent(intermediacy_by_country$Average_Side_Proportion)
intermediacy_by_country$Between_Proportion <- scales::percent(intermediacy_by_country$Between_Proportion)

# Add significance indicators
intermediacy_by_country$Ideal_Side_Sig <- ifelse(intermediacy_by_country$Ideal_Side_P < 0.05, "*", "")
intermediacy_by_country$Average_Side_Sig <- ifelse(intermediacy_by_country$Average_Side_P < 0.05, "*", "")
intermediacy_by_country$Between_Sig <- ifelse(intermediacy_by_country$Between_P < 0.05, "*", "")

# Create display table
display_intermediacy <- intermediacy_by_country %>%
  select(Country, 
         Ideal_Side = Ideal_Side_Proportion, 
         Ideal_Sig = Ideal_Side_Sig,
         Average_Side = Average_Side_Proportion, 
         Average_Sig = Average_Side_Sig,
         Between = Between_Proportion,
         Between_Sig = Between_Sig)

# Display the table
kable(display_intermediacy, 
      caption = "Intermediacy Analysis by Country (Mean Values)") %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = F) %>%
  add_header_above(c(" " = 1, "Ideal Side of Average" = 2, "Average Side of Ideal" = 2, "Between Average and Ideal" = 2))
Intermediacy Analysis by Country (Mean Values)
Ideal Side of Average
Average Side of Ideal
Between Average and Ideal
Country Ideal_Side Ideal_Sig Average_Side Average_Sig Between Between_Sig
USA 30.3% 30.3% 30.3%
Colombia 21.2% 30.3% 21.2%
Brazil 18.2% 21.2% 18.2%
Germany 36.4% 33.3% 36.4%
India 12.1% 36.4% 12.1%
Lithuania 27.3% 36.4% 27.3%
Italy 45.5% 18.2% 45.5%
Poland 39.4% 30.3% 39.4%
Spain 33.3% 27.3% 33.3%
Netherlands 33.3% 27.3% 33.3%
# Create a visualization of the "Between" proportions across countries
between_plot <- ggplot(intermediacy_by_country, aes(x = reorder(Country, -as.numeric(gsub("%", "", Between_Proportion))/100), 
                                                 y = as.numeric(gsub("%", "", Between_Proportion))/100)) +
  geom_bar(stat = "identity", fill = "steelblue") +
  geom_text(aes(label = paste0(Between_Proportion, Between_Sig)), vjust = -0.5) +
  geom_hline(yintercept = 1/3, linetype = "dashed", color = "red") +
  labs(title = "Proportion of Questions where Reasonable is Between Average and Ideal",
       subtitle = "By Country (Mean Analysis), Red line = Chance level (1/3), * = p < 0.05",
       x = "Country",
       y = "Proportion") +
  scale_y_continuous(labels = scales::percent) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

print(between_plot)

ggsave("between_proportions_by_country.png", between_plot, width = 10, height = 6, dpi = 300)

# Save the results
analysis_6_results <- list(
  country_results = country_intermediacy_results,
  summary = intermediacy_by_country
)

Analysis 7: Overall results, with median ratings BY COUNTRY

# Create storage for country-specific median results
country_results_median <- list()

# Function to analyze median data for a single country
analyze_country_median <- function(country) {
  cat("\n========== Analyzing Median Data for:", country, "==========\n")
  
  # Step 1: Split data by condition for this country
  country_average <- Data_All_Average %>% filter(Country_name == country)
  country_ideal <- Data_All_Ideal %>% filter(Country_name == country)
  country_reasonable <- Data_All_Reasonable %>% filter(Country_name == country)
  
  # Apply attention check filter (q19 == 15)
  country_average_pass <- country_average %>% filter(q19 == 15)
  country_ideal_pass <- country_ideal %>% filter(q19 == 15)
  country_reasonable_pass <- country_reasonable %>% filter(q19 == 15)
  
  # Print participant counts
  cat("Participants after attention check filter:\n")
  cat("Average condition:", nrow(country_average_pass), "\n")
  cat("Ideal condition:", nrow(country_ideal_pass), "\n")
  cat("Reasonable condition:", nrow(country_reasonable_pass), "\n")
  
  # Convert columns to numeric
  country_average_pass <- country_average_pass %>%
    mutate(across(all_of(question_cols), ~as.numeric(as.character(.))))
  
  country_ideal_pass <- country_ideal_pass %>%
    mutate(across(all_of(question_cols), ~as.numeric(as.character(.))))
  
  country_reasonable_pass <- country_reasonable_pass %>%
    mutate(across(all_of(question_cols), ~as.numeric(as.character(.))))
  
  # Step 2: Calculate medians (no 3 SD filtering for median analysis)
  country_medians_average <- sapply(country_average_pass[, question_cols], median, na.rm = TRUE)
  country_medians_ideal <- sapply(country_ideal_pass[, question_cols], median, na.rm = TRUE)
  country_medians_reasonable <- sapply(country_reasonable_pass[, question_cols], median, na.rm = TRUE)
  
  # Step 3: Convert to log scale
  country_log_medians_average <- safe_log(country_medians_average)
  country_log_medians_ideal <- safe_log(country_medians_ideal)
  country_log_medians_reasonable <- safe_log(country_medians_reasonable)
  
  # Step 4: Run regression models
  country_dataforAIC_median <- data.frame(
    Reasonable = country_log_medians_reasonable,
    Average = country_log_medians_average,
    Ideal = country_log_medians_ideal
  )
  
  # Skip regression if we have too few data points
  if(nrow(country_dataforAIC_median) < 5) {
    cat("Too few data points for regression analysis in", country, "\n")
    return(NULL)
  }
  
  # Model I: Average predicts reasonable
  country_model1_median <- lm(Reasonable ~ Average, data = country_dataforAIC_median)
  country_aic1_median <- AIC(country_model1_median)
  country_r2_1_median <- summary(country_model1_median)$r.squared
  
  # Model II: Ideal predicts reasonable
  country_model2_median <- lm(Reasonable ~ Ideal, data = country_dataforAIC_median)
  country_aic2_median <- AIC(country_model2_median)
  country_r2_2_median <- summary(country_model2_median)$r.squared
  
  # Model III: Both average and ideal predict reasonable
  country_model3_median <- lm(Reasonable ~ Average + Ideal, data = country_dataforAIC_median)
  country_aic3_median <- AIC(country_model3_median)
  country_r2_3_median <- summary(country_model3_median)$r.squared
  
  # Create regression summary
  country_reg_summary_median <- data.frame(
    Model = c("Average predicts Reasonable", 
              "Ideal predicts Reasonable", 
              "Average + Ideal predict Reasonable"),
    AIC = c(country_aic1_median, country_aic2_median, country_aic3_median),
    R_squared = c(country_r2_1_median, country_r2_2_median, country_r2_3_median)
  )
  
  # Determine which model has the lowest AIC (best fit)
  best_model_idx <- which.min(c(country_aic1_median, country_aic2_median, country_aic3_median))
  best_model_name <- c("Average", "Ideal", "Hybrid")[best_model_idx]
  
  cat("\nRegression Results for", country, "(Median Analysis):\n")
  print(country_reg_summary_median)
  cat("\nBest model for", country, "is:", best_model_name, "\n")
  
  # Return results for this country
  return(list(
    country = country,
    sample_sizes = list(
      after_attention = c(nrow(country_average_pass), nrow(country_ideal_pass), nrow(country_reasonable_pass))
    ),
    medians = list(
      average = country_medians_average,
      ideal = country_medians_ideal,
      reasonable = country_medians_reasonable
    ),
    log_medians = list(
      average = country_log_medians_average,
      ideal = country_log_medians_ideal,
      reasonable = country_log_medians_reasonable
    ),
    regression_models = list(
      model1 = country_model1_median,
      model2 = country_model2_median,
      model3 = country_model3_median
    ),
    regression_summary = country_reg_summary_median,
    aic_values = c(Model1 = country_aic1_median, Model2 = country_aic2_median, Model3 = country_aic3_median),
    best_model = best_model_name
  ))
}
# Analyze median data for each country
for(country in countries) {
  country_results_median[[country]] <- analyze_country_median(country)
}
## 
## ========== Analyzing Median Data for: USA ==========
## Participants after attention check filter:
## Average condition: 73 
## Ideal condition: 72 
## Reasonable condition: 69 
## 
## Regression Results for USA (Median Analysis):
##                                Model      AIC   R_squared
## 1        Average predicts Reasonable 182.8055 0.003201958
## 2          Ideal predicts Reasonable 171.6330 0.289488677
## 3 Average + Ideal predict Reasonable 173.0812 0.301269643
## 
## Best model for USA is: Ideal 
## 
## ========== Analyzing Median Data for: Colombia ==========
## Participants after attention check filter:
## Average condition: 89 
## Ideal condition: 84 
## Reasonable condition: 81
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `across(all_of(question_cols), ~as.numeric(as.character(.)))`.
## Caused by warning:
## ! NAs introduced by coercion
## 
## Regression Results for Colombia (Median Analysis):
##                                Model      AIC   R_squared
## 1        Average predicts Reasonable 182.6409 0.001256565
## 2          Ideal predicts Reasonable 179.8702 0.081686636
## 3 Average + Ideal predict Reasonable 177.8151 0.187874069
## 
## Best model for Colombia is: Hybrid 
## 
## ========== Analyzing Median Data for: Brazil ==========
## Participants after attention check filter:
## Average condition: 43 
## Ideal condition: 56 
## Reasonable condition: 52 
## 
## Regression Results for Brazil (Median Analysis):
##                                Model      AIC   R_squared
## 1        Average predicts Reasonable 182.9969 0.004584655
## 2          Ideal predicts Reasonable 177.8130 0.149289295
## 3 Average + Ideal predict Reasonable 178.2859 0.187758757
## 
## Best model for Brazil is: Ideal 
## 
## ========== Analyzing Median Data for: Germany ==========
## Participants after attention check filter:
## Average condition: 71 
## Ideal condition: 72 
## Reasonable condition: 72 
## 
## Regression Results for Germany (Median Analysis):
##                                Model      AIC    R_squared
## 1        Average predicts Reasonable 183.3067 0.0005518197
## 2          Ideal predicts Reasonable 174.6719 0.2306542899
## 3 Average + Ideal predict Reasonable 175.6038 0.2551562716
## 
## Best model for Germany is: Ideal 
## 
## ========== Analyzing Median Data for: India ==========
## Participants after attention check filter:
## Average condition: 74 
## Ideal condition: 71 
## Reasonable condition: 56 
## 
## Regression Results for India (Median Analysis):
##                                Model      AIC  R_squared
## 1        Average predicts Reasonable 179.6270 0.02412332
## 2          Ideal predicts Reasonable 179.2726 0.03454875
## 3 Average + Ideal predict Reasonable 181.1041 0.03946585
## 
## Best model for India is: Ideal 
## 
## ========== Analyzing Median Data for: Lithuania ==========
## Participants after attention check filter:
## Average condition: 76 
## Ideal condition: 76 
## Reasonable condition: 77 
## 
## Regression Results for Lithuania (Median Analysis):
##                                Model      AIC   R_squared
## 1        Average predicts Reasonable 182.6574 0.002627272
## 2          Ideal predicts Reasonable 178.1545 0.129842497
## 3 Average + Ideal predict Reasonable 178.0779 0.182913172
## 
## Best model for Lithuania is: Hybrid 
## 
## ========== Analyzing Median Data for: Italy ==========
## Participants after attention check filter:
## Average condition: 73 
## Ideal condition: 83 
## Reasonable condition: 75 
## 
## Regression Results for Italy (Median Analysis):
##                                Model      AIC    R_squared
## 1        Average predicts Reasonable 183.3033 0.0001271376
## 2          Ideal predicts Reasonable 178.9606 0.1234184008
## 3 Average + Ideal predict Reasonable 180.7836 0.1281056056
## 
## Best model for Italy is: Ideal 
## 
## ========== Analyzing Median Data for: Poland ==========
## Participants after attention check filter:
## Average condition: 90 
## Ideal condition: 87 
## Reasonable condition: 90
## Warning: There was 1 warning in `mutate()`.
## ℹ In argument: `across(all_of(question_cols), ~as.numeric(as.character(.)))`.
## Caused by warning:
## ! NAs introduced by coercion
## 
## Regression Results for Poland (Median Analysis):
##                                Model      AIC    R_squared
## 1        Average predicts Reasonable 183.5047 8.151568e-05
## 2          Ideal predicts Reasonable 179.0232 1.270572e-01
## 3 Average + Ideal predict Reasonable 180.5577 1.392848e-01
## 
## Best model for Poland is: Ideal 
## 
## ========== Analyzing Median Data for: Spain ==========
## Participants after attention check filter:
## Average condition: 73 
## Ideal condition: 69 
## Reasonable condition: 72 
## 
## Regression Results for Spain (Median Analysis):
##                                Model      AIC  R_squared
## 1        Average predicts Reasonable 181.8793 0.01049767
## 2          Ideal predicts Reasonable 169.4416 0.32121629
## 3 Average + Ideal predict Reasonable 171.4380 0.32129036
## 
## Best model for Spain is: Ideal 
## 
## ========== Analyzing Median Data for: Netherlands ==========
## Participants after attention check filter:
## Average condition: 131 
## Ideal condition: 130 
## Reasonable condition: 114 
## 
## Regression Results for Netherlands (Median Analysis):
##                                Model      AIC   R_squared
## 1        Average predicts Reasonable 183.4221 0.006706274
## 2          Ideal predicts Reasonable 172.2985 0.290934079
## 3 Average + Ideal predict Reasonable 173.1044 0.316133837
## 
## Best model for Netherlands is: Ideal
# Extract AIC values and best models for all countries (median analysis)
country_aic_summary_median <- data.frame(
  Country = character(),
  AIC_Average = numeric(),
  AIC_Ideal = numeric(),
  AIC_Hybrid = numeric(),
  R2_Average = numeric(),
  R2_Ideal = numeric(),
  R2_Hybrid = numeric(),
  Best_Model = character(),
  stringsAsFactors = FALSE
)

for(country in countries) {
  if(!is.null(country_results_median[[country]])) {
    country_aic_summary_median <- rbind(country_aic_summary_median, data.frame(
      Country = country,
      AIC_Average = country_results_median[[country]]$aic_values["Model1"],
      AIC_Ideal = country_results_median[[country]]$aic_values["Model2"],
      AIC_Hybrid = country_results_median[[country]]$aic_values["Model3"],
      R2_Average = country_results_median[[country]]$regression_summary$R_squared[1],
      R2_Ideal = country_results_median[[country]]$regression_summary$R_squared[2],
      R2_Hybrid = country_results_median[[country]]$regression_summary$R_squared[3],
      Best_Model = country_results_median[[country]]$best_model,
      stringsAsFactors = FALSE
    ))
  }
}

# Display the summary table
kable(country_aic_summary_median, 
      caption = "AIC Values and Best Models by Country (Median Analysis)") %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = F) %>%
  column_spec(8, background = ifelse(country_aic_summary_median$Best_Model == "Hybrid", "#e6f7ff", 
                                   ifelse(country_aic_summary_median$Best_Model == "Average", "#e6ffe6", "#ffe6e6")))
AIC Values and Best Models by Country (Median Analysis)
Country AIC_Average AIC_Ideal AIC_Hybrid R2_Average R2_Ideal R2_Hybrid Best_Model
Model1 USA 182.8055 171.6330 173.0812 0.0032020 0.2894887 0.3012696 Ideal
Model11 Colombia 182.6409 179.8702 177.8151 0.0012566 0.0816866 0.1878741 Hybrid
Model12 Brazil 182.9969 177.8130 178.2859 0.0045847 0.1492893 0.1877588 Ideal
Model13 Germany 183.3067 174.6719 175.6038 0.0005518 0.2306543 0.2551563 Ideal
Model14 India 179.6270 179.2726 181.1041 0.0241233 0.0345487 0.0394658 Ideal
Model15 Lithuania 182.6574 178.1545 178.0779 0.0026273 0.1298425 0.1829132 Hybrid
Model16 Italy 183.3033 178.9606 180.7836 0.0001271 0.1234184 0.1281056 Ideal
Model17 Poland 183.5047 179.0232 180.5577 0.0000815 0.1270572 0.1392848 Ideal
Model18 Spain 181.8793 169.4416 171.4380 0.0104977 0.3212163 0.3212904 Ideal
Model19 Netherlands 183.4221 172.2985 173.1044 0.0067063 0.2909341 0.3161338 Ideal
# Compare mean and median analysis
comparison_mean_median <- data.frame(
  Country = character(),
  Mean_Best_Model = character(),
  Median_Best_Model = character(),
  Mean_Hybrid_AIC = numeric(),
  Median_Hybrid_AIC = numeric(),
  Mean_Hybrid_R2 = numeric(),
  Median_Hybrid_R2 = numeric(),
  stringsAsFactors = FALSE
)

for(country in countries) {
  if(!is.null(country_results_mean[[country]]) && !is.null(country_results_median[[country]])) {
    comparison_mean_median <- rbind(comparison_mean_median, data.frame(
      Country = country,
      Mean_Best_Model = country_results_mean[[country]]$best_model,
      Median_Best_Model = country_results_median[[country]]$best_model,
      Mean_Hybrid_AIC = country_results_mean[[country]]$aic_values["Model3"],
      Median_Hybrid_AIC = country_results_median[[country]]$aic_values["Model3"],
      Mean_Hybrid_R2 = country_results_mean[[country]]$regression_summary$R_squared[3],
      Median_Hybrid_R2 = country_results_median[[country]]$regression_summary$R_squared[3],
      stringsAsFactors = FALSE
    ))
  }
}

# Display the comparison table
kable(comparison_mean_median, 
      caption = "Comparison of Mean and Median Analyses by Country") %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = F) %>%
  column_spec(2, background = ifelse(comparison_mean_median$Mean_Best_Model == "Hybrid", "#e6f7ff", 
                                   ifelse(comparison_mean_median$Mean_Best_Model == "Average", "#e6ffe6", "#ffe6e6"))) %>%
  column_spec(3, background = ifelse(comparison_mean_median$Median_Best_Model == "Hybrid", "#e6f7ff", 
                                   ifelse(comparison_mean_median$Median_Best_Model == "Average", "#e6ffe6", "#ffe6e6")))
Comparison of Mean and Median Analyses by Country
Country Mean_Best_Model Median_Best_Model Mean_Hybrid_AIC Median_Hybrid_AIC Mean_Hybrid_R2 Median_Hybrid_R2
Model3 USA Hybrid Ideal 179.3529 173.0812 0.1972187 0.3012696
Model31 Colombia Hybrid Hybrid 180.3834 177.8151 0.1622269 0.1878741
Model32 Brazil Hybrid Ideal 182.4260 178.2859 0.0881237 0.1877588
Model33 Germany Ideal Ideal 180.0991 175.6038 0.1613116 0.2551563
Model34 India Average Ideal 183.6439 181.1041 0.0032863 0.0394658
Model35 Lithuania Hybrid Hybrid 179.5624 178.0779 0.1671046 0.1829132
Model36 Italy Hybrid Ideal 181.2000 180.7836 0.1221354 0.1281056
Model37 Poland Hybrid Ideal 179.8445 180.5577 0.1579659 0.1392848
Model38 Spain Hybrid Ideal 176.0171 171.4380 0.2402000 0.3212904
Model39 Netherlands Ideal Ideal 183.9397 173.1044 0.0574648 0.3161338
# Save results
analysis_7_results <- list(
  country_results = country_results_median,
  aic_summary = country_aic_summary_median,
  comparison_with_means = comparison_mean_median
)

Analysis 8: Overall results, intermediacy, with median ratings BY COUNTRY

# Create storage for country-specific median intermediacy results
country_intermediacy_results_median <- list()

# Function to analyze median intermediacy for a single country
analyze_country_intermediacy_median <- function(country) {
  cat("\n========== Analyzing Median Intermediacy for:", country, "==========\n")
  
  # Use the median data from Analysis 7
  # If country data wasn't successfully processed in Analysis 7, return NULL
  if(is.null(country_results_median[[country]])) {
    cat("No valid data for", country, "\n")
    return(NULL)
  }
  
  # Extract the median values
  country_medians_average <- country_results_median[[country]]$medians$average
  country_medians_ideal <- country_results_median[[country]]$medians$ideal
  country_medians_reasonable <- country_results_median[[country]]$medians$reasonable
  
  # Step 3: Check if reasonable is on the "ideal side" of average
  # Note: For median analysis, we treat equal values as being on the predicted side
  ideal_side_vector_median <- mapply(is_ideal_side_median,
                                    country_medians_average,
                                    country_medians_ideal,
                                    country_medians_reasonable)
  
  count_ideal_side_median <- sum(ideal_side_vector_median)
  
  # Conduct binomial test
  binomial_result_ideal_median <- binom.test(count_ideal_side_median, length(ideal_side_vector_median), 
                                           p = 0.5, alternative = "greater")
  
  # Step 4: Check if reasonable is on the "average side" of ideal
  average_side_vector_median <- mapply(is_average_side_median,
                                      country_medians_average,
                                      country_medians_ideal,
                                      country_medians_reasonable)
  
  count_average_side_median <- sum(average_side_vector_median)
  
  # Conduct binomial test
  binomial_result_avg_side_median <- binom.test(count_average_side_median, length(average_side_vector_median), 
                                              p = 0.5, alternative = "greater")
  
  # Step 5: Check if reasonable is both on the ideal side of average and average side of ideal
  both_sides_vector_median <- mapply(is_both_sides_median,
                                    country_medians_average,
                                    country_medians_ideal,
                                    country_medians_reasonable)
  
  count_both_sides_median <- sum(both_sides_vector_median)
  
  # Conduct binomial test with 1/3 as the chance rate
  binomial_result_both_sides_median <- binom.test(count_both_sides_median, length(both_sides_vector_median), 
                                                p = 1/3, alternative = "two.sided")
  
  # Create a summary table
  intermediacy_summary_median <- data.frame(
    Test = c("On Ideal Side of Average", "On Average Side of Ideal", "Between Average and Ideal"),
    Count = c(count_ideal_side_median, count_average_side_median, count_both_sides_median),
    Total = rep(length(ideal_side_vector_median), 3),
    Proportion = c(count_ideal_side_median/length(ideal_side_vector_median), 
                  count_average_side_median/length(average_side_vector_median), 
                  count_both_sides_median/length(both_sides_vector_median)),
    p_value = c(binomial_result_ideal_median$p.value, 
               binomial_result_avg_side_median$p.value, 
               binomial_result_both_sides_median$p.value)
  )
  
  # Format proportions and p-values
  intermediacy_summary_median$Proportion <- scales::percent(intermediacy_summary_median$Proportion)
  intermediacy_summary_median$p_value <- sapply(intermediacy_summary_median$p_value, format_pvalue)
  
  cat("\nMedian Intermediary Analysis for", country, ":\n")
  print(intermediacy_summary_median)
  
  # Return results
  return(list(
    country = country,
    ideal_side = list(
      vector = ideal_side_vector_median,
      count = count_ideal_side_median,
      proportion = count_ideal_side_median/length(ideal_side_vector_median),
      binomial_test = binomial_result_ideal_median
    ),
    average_side = list(
      vector = average_side_vector_median,
      count = count_average_side_median,
      proportion = count_average_side_median/length(average_side_vector_median),
      binomial_test = binomial_result_avg_side_median
    ),
    both_sides = list(
      vector = both_sides_vector_median,
      count = count_both_sides_median,
      proportion = count_both_sides_median/length(both_sides_vector_median),
      binomial_test = binomial_result_both_sides_median
    ),
    summary = intermediacy_summary_median
  ))
}
# Analyze median intermediacy for each country
for(country in countries) {
  country_intermediacy_results_median[[country]] <- analyze_country_intermediacy_median(country)
}
## 
## ========== Analyzing Median Intermediacy for: USA ==========
## 
## Median Intermediary Analysis for USA :
##                        Test Count Total Proportion     p_value
## 1  On Ideal Side of Average    26    33      78.8% < 0.001 ***
## 2  On Average Side of Ideal    15    33      45.5%       0.757
## 3 Between Average and Ideal    14    33      42.4%       0.272
## 
## ========== Analyzing Median Intermediacy for: Colombia ==========
## 
## Median Intermediary Analysis for Colombia :
##                        Test Count Total Proportion     p_value
## 1  On Ideal Side of Average    26    33      78.8% < 0.001 ***
## 2  On Average Side of Ideal    11    33      33.3%       0.982
## 3 Between Average and Ideal    13    33      39.4%       0.464
## 
## ========== Analyzing Median Intermediacy for: Brazil ==========
## 
## Median Intermediary Analysis for Brazil :
##                        Test Count Total Proportion     p_value
## 1  On Ideal Side of Average    26    33        79% < 0.001 ***
## 2  On Average Side of Ideal     8    33        24%       0.999
## 3 Between Average and Ideal    15    33        45%       0.143
## 
## ========== Analyzing Median Intermediacy for: Germany ==========
## 
## Median Intermediary Analysis for Germany :
##                        Test Count Total Proportion  p_value
## 1  On Ideal Side of Average    25    33        76% 0.002 **
## 2  On Average Side of Ideal    13    33        39%    0.919
## 3 Between Average and Ideal    18    33        55%  0.015 *
## 
## ========== Analyzing Median Intermediacy for: India ==========
## 
## Median Intermediary Analysis for India :
##                        Test Count Total Proportion  p_value
## 1  On Ideal Side of Average    23    33        70%  0.018 *
## 2  On Average Side of Ideal    18    33        55%    0.364
## 3 Between Average and Ideal     3    33         9% 0.002 **
## 
## ========== Analyzing Median Intermediacy for: Lithuania ==========
## 
## Median Intermediary Analysis for Lithuania :
##                        Test Count Total Proportion     p_value
## 1  On Ideal Side of Average    27    33      81.8% < 0.001 ***
## 2  On Average Side of Ideal    13    33      39.4%       0.919
## 3 Between Average and Ideal    15    33      45.5%       0.143
## 
## ========== Analyzing Median Intermediacy for: Italy ==========
## 
## Median Intermediary Analysis for Italy :
##                        Test Count Total Proportion  p_value
## 1  On Ideal Side of Average    24    33        73% 0.007 **
## 2  On Average Side of Ideal    10    33        30%    0.993
## 3 Between Average and Ideal    17    33        52%   0.04 *
## 
## ========== Analyzing Median Intermediacy for: Poland ==========
## 
## Median Intermediary Analysis for Poland :
##                        Test Count Total Proportion p_value
## 1  On Ideal Side of Average    23    33      69.7% 0.018 *
## 2  On Average Side of Ideal    14    33      42.4%   0.852
## 3 Between Average and Ideal    15    33      45.5%   0.143
## 
## ========== Analyzing Median Intermediacy for: Spain ==========
## 
## Median Intermediary Analysis for Spain :
##                        Test Count Total Proportion     p_value
## 1  On Ideal Side of Average    26    33        79% < 0.001 ***
## 2  On Average Side of Ideal    12    33        36%        0.96
## 3 Between Average and Ideal    16    33        48%       0.094
## 
## ========== Analyzing Median Intermediacy for: Netherlands ==========
## 
## Median Intermediary Analysis for Netherlands :
##                        Test Count Total Proportion  p_value
## 1  On Ideal Side of Average    24    33      72.7% 0.007 **
## 2  On Average Side of Ideal    13    33      39.4%    0.919
## 3 Between Average and Ideal    15    33      45.5%    0.143
# Create summary table for median intermediacy across countries
intermediacy_by_country_median <- data.frame(
  Country = character(),
  Ideal_Side_Proportion = numeric(),
  Ideal_Side_P = numeric(),
  Average_Side_Proportion = numeric(),
  Average_Side_P = numeric(),
  Between_Proportion = numeric(),
  Between_P = numeric(),
  stringsAsFactors = FALSE
)

for(country in countries) {
  if(!is.null(country_intermediacy_results_median[[country]])) {
    intermediacy_by_country_median <- rbind(intermediacy_by_country_median, data.frame(
      Country = country,
      Ideal_Side_Proportion = country_intermediacy_results_median[[country]]$ideal_side$proportion,
      Ideal_Side_P = country_intermediacy_results_median[[country]]$ideal_side$binomial_test$p.value,
      Average_Side_Proportion = country_intermediacy_results_median[[country]]$average_side$proportion,
      Average_Side_P = country_intermediacy_results_median[[country]]$average_side$binomial_test$p.value,
      Between_Proportion = country_intermediacy_results_median[[country]]$both_sides$proportion,
      Between_P = country_intermediacy_results_median[[country]]$both_sides$binomial_test$p.value,
      stringsAsFactors = FALSE
    ))
  }
}

# Format the proportions as percentages
intermediacy_by_country_median$Ideal_Side_Proportion <- scales::percent(intermediacy_by_country_median$Ideal_Side_Proportion)
intermediacy_by_country_median$Average_Side_Proportion <- scales::percent(intermediacy_by_country_median$Average_Side_Proportion)
intermediacy_by_country_median$Between_Proportion <- scales::percent(intermediacy_by_country_median$Between_Proportion)

# Add significance indicators
intermediacy_by_country_median$Ideal_Side_Sig <- ifelse(intermediacy_by_country_median$Ideal_Side_P < 0.05, "*", "")
intermediacy_by_country_median$Average_Side_Sig <- ifelse(intermediacy_by_country_median$Average_Side_P < 0.05, "*", "")
intermediacy_by_country_median$Between_Sig <- ifelse(intermediacy_by_country_median$Between_P < 0.05, "*", "")

# Create display table
display_intermediacy_median <- intermediacy_by_country_median %>%
  select(Country, 
         Ideal_Side = Ideal_Side_Proportion, 
         Ideal_Sig = Ideal_Side_Sig,
         Average_Side = Average_Side_Proportion, 
         Average_Sig = Average_Side_Sig,
         Between = Between_Proportion,
         Between_Sig = Between_Sig)

# Display the table
kable(display_intermediacy_median, 
      caption = "Intermediacy Analysis by Country (Median Values)") %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"), full_width = F) %>%
  add_header_above(c(" " = 1, "Ideal Side of Average" = 2, "Average Side of Ideal" = 2, "Between Average and Ideal" = 2))
Intermediacy Analysis by Country (Median Values)
Ideal Side of Average
Average Side of Ideal
Between Average and Ideal
Country Ideal_Side Ideal_Sig Average_Side Average_Sig Between Between_Sig
USA 78.8%
45.5% 42.4%
Colombia 78.8%
33.3% 39.4%
Brazil 78.8%
24.2% 45.5%
Germany 75.8%
39.4% 54.5%
India 69.7%
54.5% 9.1%
Lithuania 81.8%
39.4% 45.5%
Italy 72.7%
30.3% 51.5%
Poland 69.7%
42.4% 45.5%
Spain 78.8%
36.4% 48.5%
Netherlands 72.7%
39.4% 45.5%
# Compare mean and median intermediacy results
comparison_intermediacy_mean_median <- data.frame(
  Country = character(),
  Mean_Ideal_Side = character(),
  Median_Ideal_Side = character(),
  Mean_Average_Side = character(),
  Median_Average_Side = character(),
  Mean_Between = character(),
  Median_Between = character(),
  stringsAsFactors = FALSE
)

# Create a visualization comparing "Between" proportions for mean vs median analyses
between_data_long <- data.frame(
  Country = rep(intermediacy_by_country$Country, 2),
  Analysis_Type = c(rep("Mean", nrow(intermediacy_by_country)), rep("Median", nrow(intermediacy_by_country_median))),
  Between_Proportion = c(as.numeric(gsub("%", "", intermediacy_by_country$Between_Proportion))/100,
                         as.numeric(gsub("%", "", intermediacy_by_country_median$Between_Proportion))/100),
  Significant = c(intermediacy_by_country$Between_P < 0.05, 
                  intermediacy_by_country_median$Between_P < 0.05)
)

# Plot the comparison
between_comparison_plot <- ggplot(between_data_long, 
                                aes(x = reorder(Country, -Between_Proportion), 
                                    y = Between_Proportion, 
                                    fill = Analysis_Type)) +
  geom_bar(stat = "identity", position = position_dodge()) +
  geom_text(aes(label = ifelse(Significant, "*", "")), 
            position = position_dodge(width = 0.9), vjust = -0.5) +
  geom_hline(yintercept = 1/3, linetype = "dashed", color = "red") +
  labs(title = "Proportion of Questions where Reasonable is Between Average and Ideal",
       subtitle = "Comparison of Mean vs Median Analysis by Country, Red line = Chance level (1/3), * = p < 0.05",
       x = "Country",
       y = "Proportion") +
  scale_y_continuous(labels = scales::percent) +
  theme_minimal() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

print(between_comparison_plot)

ggsave("between_proportions_comparison.png", between_comparison_plot, width = 12, height = 6, dpi = 300)

# Save the results
analysis_8_results <- list(
  country_results = country_intermediacy_results_median,
  summary = intermediacy_by_country_median,
  comparison_with_means = comparison_intermediacy_mean_median
)